""" Placeholder Catalog Generator Generates comprehensive documentation for all placeholders: 1. PLACEHOLDER_CATALOG_EXTENDED.json - Machine-readable full metadata 2. PLACEHOLDER_CATALOG_EXTENDED.md - Human-readable catalog 3. PLACEHOLDER_GAP_REPORT.md - Technical gaps and issues 4. PLACEHOLDER_EXPORT_SPEC.md - Export format specification This implements the normative standard for placeholder documentation. """ import sys import json from pathlib import Path from datetime import datetime from typing import Dict, List, Any # Add backend to path sys.path.insert(0, str(Path(__file__).parent)) from placeholder_metadata import ( PlaceholderMetadata, PlaceholderType, TimeWindow, OutputType, METADATA_REGISTRY ) from placeholder_metadata_extractor import build_complete_metadata_registry from generate_complete_metadata import apply_manual_corrections, generate_gap_report # ── 1. JSON Catalog ─────────────────────────────────────────────────────────── def generate_json_catalog(registry, output_dir: Path): """Generate PLACEHOLDER_CATALOG_EXTENDED.json""" all_metadata = registry.get_all() catalog = { "schema_version": "1.0.0", "generated_at": datetime.now().isoformat(), "normative_standard": "PLACEHOLDER_METADATA_REQUIREMENTS_V2_NORMATIVE.md", "total_placeholders": len(all_metadata), "placeholders": {} } for key, metadata in sorted(all_metadata.items()): catalog["placeholders"][key] = metadata.to_dict() output_path = output_dir / "PLACEHOLDER_CATALOG_EXTENDED.json" with open(output_path, 'w', encoding='utf-8') as f: json.dump(catalog, f, indent=2, ensure_ascii=False) print(f"Generated: {output_path}") return output_path # ── 2. Markdown Catalog ─────────────────────────────────────────────────────── def generate_markdown_catalog(registry, output_dir: Path): """Generate PLACEHOLDER_CATALOG_EXTENDED.md""" all_metadata = registry.get_all() by_category = registry.get_by_category() md = [] md.append("# Placeholder Catalog (Extended)") md.append("") md.append(f"**Generated:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") md.append(f"**Total Placeholders:** {len(all_metadata)}") md.append(f"**Normative Standard:** PLACEHOLDER_METADATA_REQUIREMENTS_V2_NORMATIVE.md") md.append("") md.append("---") md.append("") # Summary Statistics md.append("## Summary Statistics") md.append("") # By Type by_type = {} for metadata in all_metadata.values(): ptype = metadata.type.value by_type[ptype] = by_type.get(ptype, 0) + 1 md.append("### By Type") md.append("") md.append("| Type | Count | Percentage |") md.append("|------|-------|------------|") for ptype, count in sorted(by_type.items()): pct = count / len(all_metadata) * 100 md.append(f"| {ptype} | {count} | {pct:.1f}% |") md.append("") # By Category md.append("### By Category") md.append("") md.append("| Category | Count |") md.append("|----------|-------|") for category, metadata_list in sorted(by_category.items()): md.append(f"| {category} | {len(metadata_list)} |") md.append("") md.append("---") md.append("") # Detailed Catalog by Category md.append("## Detailed Placeholder Catalog") md.append("") for category, metadata_list in sorted(by_category.items()): md.append(f"### {category} ({len(metadata_list)} placeholders)") md.append("") for metadata in sorted(metadata_list, key=lambda m: m.key): md.append(f"#### `{{{{{metadata.key}}}}}`") md.append("") md.append(f"**Description:** {metadata.description}") md.append("") md.append(f"**Semantic Contract:** {metadata.semantic_contract}") md.append("") # Metadata table md.append("| Property | Value |") md.append("|----------|-------|") md.append(f"| Type | `{metadata.type.value}` |") md.append(f"| Time Window | `{metadata.time_window.value}` |") md.append(f"| Output Type | `{metadata.output_type.value}` |") md.append(f"| Unit | {metadata.unit or 'None'} |") md.append(f"| Format Hint | {metadata.format_hint or 'None'} |") md.append(f"| Version | {metadata.version} |") md.append(f"| Deprecated | {metadata.deprecated} |") md.append("") # Source md.append("**Source:**") md.append(f"- Resolver: `{metadata.source.resolver}`") md.append(f"- Module: `{metadata.source.module}`") if metadata.source.function: md.append(f"- Function: `{metadata.source.function}`") if metadata.source.data_layer_module: md.append(f"- Data Layer: `{metadata.source.data_layer_module}`") if metadata.source.source_tables: tables = ", ".join([f"`{t}`" for t in metadata.source.source_tables]) md.append(f"- Tables: {tables}") md.append("") # Known Issues if metadata.known_issues: md.append("**Known Issues:**") for issue in metadata.known_issues: md.append(f"- {issue}") md.append("") # Notes if metadata.notes: md.append("**Notes:**") for note in metadata.notes: md.append(f"- {note}") md.append("") md.append("---") md.append("") output_path = output_dir / "PLACEHOLDER_CATALOG_EXTENDED.md" with open(output_path, 'w', encoding='utf-8') as f: f.write("\n".join(md)) print(f"Generated: {output_path}") return output_path # ── 3. Gap Report ───────────────────────────────────────────────────────────── def generate_gap_report_md(registry, gaps: Dict, output_dir: Path): """Generate PLACEHOLDER_GAP_REPORT.md""" all_metadata = registry.get_all() total = len(all_metadata) md = [] md.append("# Placeholder Metadata Gap Report") md.append("") md.append(f"**Generated:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") md.append(f"**Total Placeholders:** {total}") md.append("") md.append("This report identifies placeholders with incomplete or unresolved metadata fields.") md.append("") md.append("---") md.append("") # Summary gap_count = sum(len(v) for v in gaps.values()) coverage = (1 - gap_count / (total * 6)) * 100 # 6 gap types md.append("## Summary") md.append("") md.append(f"- **Total Gap Instances:** {gap_count}") md.append(f"- **Metadata Coverage:** {coverage:.1f}%") md.append("") # Detailed Gaps md.append("## Detailed Gap Analysis") md.append("") for gap_type, placeholders in sorted(gaps.items()): if not placeholders: continue md.append(f"### {gap_type.replace('_', ' ').title()}") md.append("") md.append(f"**Count:** {len(placeholders)}") md.append("") # Get category for each placeholder by_cat = {} for key in placeholders: metadata = registry.get(key) if metadata: cat = metadata.category if cat not in by_cat: by_cat[cat] = [] by_cat[cat].append(key) for category, keys in sorted(by_cat.items()): md.append(f"#### {category}") md.append("") for key in sorted(keys): md.append(f"- `{{{{{key}}}}}`") md.append("") # Recommendations md.append("---") md.append("") md.append("## Recommendations") md.append("") if gaps.get('unknown_time_window'): md.append("### Time Window Resolution") md.append("") md.append("Placeholders with unknown time windows should be analyzed to determine:") md.append("- Whether they use `latest`, `7d`, `28d`, `30d`, `90d`, or `custom`") md.append("- Document in semantic_contract if time window is variable") md.append("") if gaps.get('legacy_unknown_type'): md.append("### Type Classification") md.append("") md.append("Placeholders with `legacy_unknown` type should be classified as:") md.append("- `atomic` - Single atomic value") md.append("- `raw_data` - Structured raw data (JSON, lists)") md.append("- `interpreted` - AI-interpreted or derived values") md.append("") if gaps.get('missing_data_layer_module'): md.append("### Data Layer Tracking") md.append("") md.append("Placeholders without data_layer_module should be investigated:") md.append("- Check if they call data_layer functions") md.append("- Document direct database access if no data_layer function exists") md.append("") output_path = output_dir / "PLACEHOLDER_GAP_REPORT.md" with open(output_path, 'w', encoding='utf-8') as f: f.write("\n".join(md)) print(f"Generated: {output_path}") return output_path # ── 4. Export Spec ──────────────────────────────────────────────────────────── def generate_export_spec_md(output_dir: Path): """Generate PLACEHOLDER_EXPORT_SPEC.md""" md = [] md.append("# Placeholder Export Specification") md.append("") md.append(f"**Version:** 1.0.0") md.append(f"**Generated:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") md.append(f"**Normative Standard:** PLACEHOLDER_METADATA_REQUIREMENTS_V2_NORMATIVE.md") md.append("") md.append("---") md.append("") # Overview md.append("## Overview") md.append("") md.append("The Placeholder Export API provides two endpoints:") md.append("") md.append("1. **Legacy Export** (`/api/prompts/placeholders/export-values`)") md.append(" - Backward-compatible format") md.append(" - Simple key-value pairs") md.append(" - Organized by category") md.append("") md.append("2. **Extended Export** (`/api/prompts/placeholders/export-values-extended`)") md.append(" - Complete normative metadata") md.append(" - Runtime value resolution") md.append(" - Gap analysis") md.append(" - Validation results") md.append("") # Extended Export Format md.append("## Extended Export Format") md.append("") md.append("### Root Structure") md.append("") md.append("```json") md.append("{") md.append(' "schema_version": "1.0.0",') md.append(' "export_date": "2026-03-29T12:00:00Z",') md.append(' "profile_id": "user-123",') md.append(' "legacy": { ... },') md.append(' "metadata": { ... },') md.append(' "validation": { ... }') md.append("}") md.append("```") md.append("") # Legacy Section md.append("### Legacy Section") md.append("") md.append("Maintains backward compatibility with existing export consumers.") md.append("") md.append("```json") md.append('"legacy": {') md.append(' "all_placeholders": {') md.append(' "weight_aktuell": "85.8 kg",') md.append(' "name": "Max Mustermann",') md.append(' ...') md.append(' },') md.append(' "placeholders_by_category": {') md.append(' "Körper": [') md.append(' {') md.append(' "key": "{{weight_aktuell}}",') md.append(' "description": "Aktuelles Gewicht in kg",') md.append(' "value": "85.8 kg",') md.append(' "example": "85.8 kg"') md.append(' },') md.append(' ...') md.append(' ],') md.append(' ...') md.append(' },') md.append(' "count": 116') md.append('}') md.append("```") md.append("") # Metadata Section md.append("### Metadata Section") md.append("") md.append("Complete normative metadata for all placeholders.") md.append("") md.append("```json") md.append('"metadata": {') md.append(' "flat": [') md.append(' {') md.append(' "key": "weight_aktuell",') md.append(' "placeholder": "{{weight_aktuell}}",') md.append(' "category": "Körper",') md.append(' "type": "atomic",') md.append(' "description": "Aktuelles Gewicht in kg",') md.append(' "semantic_contract": "Letzter verfügbarer Gewichtseintrag...",') md.append(' "unit": "kg",') md.append(' "time_window": "latest",') md.append(' "output_type": "number",') md.append(' "format_hint": "85.8 kg",') md.append(' "value_display": "85.8 kg",') md.append(' "value_raw": 85.8,') md.append(' "available": true,') md.append(' "source": {') md.append(' "resolver": "get_latest_weight",') md.append(' "module": "placeholder_resolver.py",') md.append(' "function": "get_latest_weight_data",') md.append(' "data_layer_module": "body_metrics",') md.append(' "source_tables": ["weight_log"]') md.append(' },') md.append(' ...') md.append(' },') md.append(' ...') md.append(' ],') md.append(' "by_category": { ... },') md.append(' "summary": {') md.append(' "total_placeholders": 116,') md.append(' "available": 98,') md.append(' "missing": 18,') md.append(' "by_type": {') md.append(' "atomic": 85,') md.append(' "interpreted": 20,') md.append(' "raw_data": 8,') md.append(' "legacy_unknown": 3') md.append(' },') md.append(' "coverage": {') md.append(' "fully_resolved": 75,') md.append(' "partially_resolved": 30,') md.append(' "unresolved": 11') md.append(' }') md.append(' },') md.append(' "gaps": {') md.append(' "unknown_time_window": ["placeholder1", ...],') md.append(' "missing_semantic_contract": [...],') md.append(' ...') md.append(' }') md.append('}') md.append("```") md.append("") # Validation Section md.append("### Validation Section") md.append("") md.append("Results of normative standard validation.") md.append("") md.append("```json") md.append('"validation": {') md.append(' "compliant": 89,') md.append(' "non_compliant": 27,') md.append(' "issues": [') md.append(' {') md.append(' "placeholder": "activity_summary",') md.append(' "violations": [') md.append(' {') md.append(' "field": "time_window",') md.append(' "issue": "Time window UNKNOWN should be resolved",') md.append(' "severity": "warning"') md.append(' }') md.append(' ]') md.append(' },') md.append(' ...') md.append(' ]') md.append('}') md.append("```") md.append("") # Usage md.append("## API Usage") md.append("") md.append("### Legacy Export") md.append("") md.append("```bash") md.append("GET /api/prompts/placeholders/export-values") md.append("Header: X-Auth-Token: ") md.append("```") md.append("") md.append("### Extended Export") md.append("") md.append("```bash") md.append("GET /api/prompts/placeholders/export-values-extended") md.append("Header: X-Auth-Token: ") md.append("```") md.append("") # Standards Compliance md.append("## Standards Compliance") md.append("") md.append("The extended export implements the following normative requirements:") md.append("") md.append("1. **Non-Breaking:** Legacy export remains unchanged") md.append("2. **Complete Metadata:** All fields from normative standard") md.append("3. **Runtime Resolution:** Values resolved for current profile") md.append("4. **Gap Transparency:** Unresolved fields explicitly marked") md.append("5. **Validation:** Automated compliance checking") md.append("6. **Versioning:** Schema version for future evolution") md.append("") output_path = output_dir / "PLACEHOLDER_EXPORT_SPEC.md" with open(output_path, 'w', encoding='utf-8') as f: f.write("\n".join(md)) print(f"Generated: {output_path}") return output_path # ── Main ────────────────────────────────────────────────────────────────────── def main(): """Main catalog generation function.""" print("="*60) print("PLACEHOLDER CATALOG GENERATOR") print("="*60) print() # Setup output directory output_dir = Path(__file__).parent.parent / "docs" output_dir.mkdir(parents=True, exist_ok=True) print(f"Output directory: {output_dir}") print() try: # Build registry print("Building metadata registry...") registry = build_complete_metadata_registry() registry = apply_manual_corrections(registry) print(f"Loaded {registry.count()} placeholders") print() # Generate gap report data print("Analyzing gaps...") gaps = generate_gap_report(registry) print() # Generate all documentation files print("Generating documentation files...") print() generate_json_catalog(registry, output_dir) generate_markdown_catalog(registry, output_dir) generate_gap_report_md(registry, gaps, output_dir) generate_export_spec_md(output_dir) print() print("="*60) print("CATALOG GENERATION COMPLETE") print("="*60) print() print("Generated files:") print(f" 1. {output_dir}/PLACEHOLDER_CATALOG_EXTENDED.json") print(f" 2. {output_dir}/PLACEHOLDER_CATALOG_EXTENDED.md") print(f" 3. {output_dir}/PLACEHOLDER_GAP_REPORT.md") print(f" 4. {output_dir}/PLACEHOLDER_EXPORT_SPEC.md") print() return 0 except Exception as e: print() print(f"ERROR: {e}") import traceback traceback.print_exc() return 1 if __name__ == "__main__": sys.exit(main())