Implements comprehensive metadata system for all 116 placeholders according to PLACEHOLDER_METADATA_REQUIREMENTS_V2_NORMATIVE standard. Backend: - placeholder_metadata.py: Complete schema (PlaceholderMetadata, Registry, Validation) - placeholder_metadata_extractor.py: Automatic extraction with heuristics - placeholder_metadata_complete.py: Hand-curated metadata for all 116 placeholders - generate_complete_metadata.py: Metadata generation with manual corrections - generate_placeholder_catalog.py: Documentation generator (4 output files) - routers/prompts.py: New extended export endpoint (non-breaking) - tests/test_placeholder_metadata.py: Comprehensive test suite Documentation: - PLACEHOLDER_GOVERNANCE.md: Mandatory governance guidelines - PLACEHOLDER_METADATA_IMPLEMENTATION_SUMMARY.md: Complete implementation docs Features: - Normative compliant metadata for all 116 placeholders - Non-breaking extended export API endpoint - Automatic + manual metadata curation - Validation framework with error/warning levels - Gap reporting for unresolved fields - Catalog generator (JSON, Markdown, Gap Report, Export Spec) - Test suite (20+ tests) - Governance rules for future placeholders API: - GET /api/prompts/placeholders/export-values-extended (NEW) - GET /api/prompts/placeholders/export-values (unchanged, backward compatible) Architecture: - PlaceholderType enum: atomic, raw_data, interpreted, legacy_unknown - TimeWindow enum: latest, 7d, 14d, 28d, 30d, 90d, custom, mixed, unknown - OutputType enum: string, number, integer, boolean, json, markdown, date, enum - Complete source tracking (resolver, data_layer, tables) - Runtime value resolution - Usage tracking (prompts, pipelines, charts) Statistics: - 6 new Python modules (~2500+ lines) - 1 modified module (extended) - 2 new documentation files - 4 generated documentation files (to be created in Docker) - 20+ test cases - 116 placeholders inventoried Next Steps: 1. Run in Docker: python /app/generate_placeholder_catalog.py 2. Test extended export endpoint 3. Verify all 116 placeholders have complete metadata Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
531 lines
18 KiB
Python
531 lines
18 KiB
Python
"""
|
|
Placeholder Catalog Generator
|
|
|
|
Generates comprehensive documentation for all placeholders:
|
|
1. PLACEHOLDER_CATALOG_EXTENDED.json - Machine-readable full metadata
|
|
2. PLACEHOLDER_CATALOG_EXTENDED.md - Human-readable catalog
|
|
3. PLACEHOLDER_GAP_REPORT.md - Technical gaps and issues
|
|
4. PLACEHOLDER_EXPORT_SPEC.md - Export format specification
|
|
|
|
This implements the normative standard for placeholder documentation.
|
|
"""
|
|
import sys
|
|
import json
|
|
from pathlib import Path
|
|
from datetime import datetime
|
|
from typing import Dict, List, Any
|
|
|
|
# Add backend to path
|
|
sys.path.insert(0, str(Path(__file__).parent))
|
|
|
|
from placeholder_metadata import (
|
|
PlaceholderMetadata,
|
|
PlaceholderType,
|
|
TimeWindow,
|
|
OutputType,
|
|
METADATA_REGISTRY
|
|
)
|
|
from placeholder_metadata_extractor import build_complete_metadata_registry
|
|
from generate_complete_metadata import apply_manual_corrections, generate_gap_report
|
|
|
|
|
|
# ── 1. JSON Catalog ───────────────────────────────────────────────────────────
|
|
|
|
def generate_json_catalog(registry, output_dir: Path):
|
|
"""Generate PLACEHOLDER_CATALOG_EXTENDED.json"""
|
|
all_metadata = registry.get_all()
|
|
|
|
catalog = {
|
|
"schema_version": "1.0.0",
|
|
"generated_at": datetime.now().isoformat(),
|
|
"normative_standard": "PLACEHOLDER_METADATA_REQUIREMENTS_V2_NORMATIVE.md",
|
|
"total_placeholders": len(all_metadata),
|
|
"placeholders": {}
|
|
}
|
|
|
|
for key, metadata in sorted(all_metadata.items()):
|
|
catalog["placeholders"][key] = metadata.to_dict()
|
|
|
|
output_path = output_dir / "PLACEHOLDER_CATALOG_EXTENDED.json"
|
|
with open(output_path, 'w', encoding='utf-8') as f:
|
|
json.dump(catalog, f, indent=2, ensure_ascii=False)
|
|
|
|
print(f"Generated: {output_path}")
|
|
return output_path
|
|
|
|
|
|
# ── 2. Markdown Catalog ───────────────────────────────────────────────────────
|
|
|
|
def generate_markdown_catalog(registry, output_dir: Path):
|
|
"""Generate PLACEHOLDER_CATALOG_EXTENDED.md"""
|
|
all_metadata = registry.get_all()
|
|
by_category = registry.get_by_category()
|
|
|
|
md = []
|
|
md.append("# Placeholder Catalog (Extended)")
|
|
md.append("")
|
|
md.append(f"**Generated:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
|
|
md.append(f"**Total Placeholders:** {len(all_metadata)}")
|
|
md.append(f"**Normative Standard:** PLACEHOLDER_METADATA_REQUIREMENTS_V2_NORMATIVE.md")
|
|
md.append("")
|
|
md.append("---")
|
|
md.append("")
|
|
|
|
# Summary Statistics
|
|
md.append("## Summary Statistics")
|
|
md.append("")
|
|
|
|
# By Type
|
|
by_type = {}
|
|
for metadata in all_metadata.values():
|
|
ptype = metadata.type.value
|
|
by_type[ptype] = by_type.get(ptype, 0) + 1
|
|
|
|
md.append("### By Type")
|
|
md.append("")
|
|
md.append("| Type | Count | Percentage |")
|
|
md.append("|------|-------|------------|")
|
|
for ptype, count in sorted(by_type.items()):
|
|
pct = count / len(all_metadata) * 100
|
|
md.append(f"| {ptype} | {count} | {pct:.1f}% |")
|
|
md.append("")
|
|
|
|
# By Category
|
|
md.append("### By Category")
|
|
md.append("")
|
|
md.append("| Category | Count |")
|
|
md.append("|----------|-------|")
|
|
for category, metadata_list in sorted(by_category.items()):
|
|
md.append(f"| {category} | {len(metadata_list)} |")
|
|
md.append("")
|
|
|
|
md.append("---")
|
|
md.append("")
|
|
|
|
# Detailed Catalog by Category
|
|
md.append("## Detailed Placeholder Catalog")
|
|
md.append("")
|
|
|
|
for category, metadata_list in sorted(by_category.items()):
|
|
md.append(f"### {category} ({len(metadata_list)} placeholders)")
|
|
md.append("")
|
|
|
|
for metadata in sorted(metadata_list, key=lambda m: m.key):
|
|
md.append(f"#### `{{{{{metadata.key}}}}}`")
|
|
md.append("")
|
|
md.append(f"**Description:** {metadata.description}")
|
|
md.append("")
|
|
md.append(f"**Semantic Contract:** {metadata.semantic_contract}")
|
|
md.append("")
|
|
|
|
# Metadata table
|
|
md.append("| Property | Value |")
|
|
md.append("|----------|-------|")
|
|
md.append(f"| Type | `{metadata.type.value}` |")
|
|
md.append(f"| Time Window | `{metadata.time_window.value}` |")
|
|
md.append(f"| Output Type | `{metadata.output_type.value}` |")
|
|
md.append(f"| Unit | {metadata.unit or 'None'} |")
|
|
md.append(f"| Format Hint | {metadata.format_hint or 'None'} |")
|
|
md.append(f"| Version | {metadata.version} |")
|
|
md.append(f"| Deprecated | {metadata.deprecated} |")
|
|
md.append("")
|
|
|
|
# Source
|
|
md.append("**Source:**")
|
|
md.append(f"- Resolver: `{metadata.source.resolver}`")
|
|
md.append(f"- Module: `{metadata.source.module}`")
|
|
if metadata.source.function:
|
|
md.append(f"- Function: `{metadata.source.function}`")
|
|
if metadata.source.data_layer_module:
|
|
md.append(f"- Data Layer: `{metadata.source.data_layer_module}`")
|
|
if metadata.source.source_tables:
|
|
tables = ", ".join([f"`{t}`" for t in metadata.source.source_tables])
|
|
md.append(f"- Tables: {tables}")
|
|
md.append("")
|
|
|
|
# Known Issues
|
|
if metadata.known_issues:
|
|
md.append("**Known Issues:**")
|
|
for issue in metadata.known_issues:
|
|
md.append(f"- {issue}")
|
|
md.append("")
|
|
|
|
# Notes
|
|
if metadata.notes:
|
|
md.append("**Notes:**")
|
|
for note in metadata.notes:
|
|
md.append(f"- {note}")
|
|
md.append("")
|
|
|
|
md.append("---")
|
|
md.append("")
|
|
|
|
output_path = output_dir / "PLACEHOLDER_CATALOG_EXTENDED.md"
|
|
with open(output_path, 'w', encoding='utf-8') as f:
|
|
f.write("\n".join(md))
|
|
|
|
print(f"Generated: {output_path}")
|
|
return output_path
|
|
|
|
|
|
# ── 3. Gap Report ─────────────────────────────────────────────────────────────
|
|
|
|
def generate_gap_report_md(registry, gaps: Dict, output_dir: Path):
|
|
"""Generate PLACEHOLDER_GAP_REPORT.md"""
|
|
all_metadata = registry.get_all()
|
|
total = len(all_metadata)
|
|
|
|
md = []
|
|
md.append("# Placeholder Metadata Gap Report")
|
|
md.append("")
|
|
md.append(f"**Generated:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
|
|
md.append(f"**Total Placeholders:** {total}")
|
|
md.append("")
|
|
md.append("This report identifies placeholders with incomplete or unresolved metadata fields.")
|
|
md.append("")
|
|
md.append("---")
|
|
md.append("")
|
|
|
|
# Summary
|
|
gap_count = sum(len(v) for v in gaps.values())
|
|
coverage = (1 - gap_count / (total * 6)) * 100 # 6 gap types
|
|
|
|
md.append("## Summary")
|
|
md.append("")
|
|
md.append(f"- **Total Gap Instances:** {gap_count}")
|
|
md.append(f"- **Metadata Coverage:** {coverage:.1f}%")
|
|
md.append("")
|
|
|
|
# Detailed Gaps
|
|
md.append("## Detailed Gap Analysis")
|
|
md.append("")
|
|
|
|
for gap_type, placeholders in sorted(gaps.items()):
|
|
if not placeholders:
|
|
continue
|
|
|
|
md.append(f"### {gap_type.replace('_', ' ').title()}")
|
|
md.append("")
|
|
md.append(f"**Count:** {len(placeholders)}")
|
|
md.append("")
|
|
|
|
# Get category for each placeholder
|
|
by_cat = {}
|
|
for key in placeholders:
|
|
metadata = registry.get(key)
|
|
if metadata:
|
|
cat = metadata.category
|
|
if cat not in by_cat:
|
|
by_cat[cat] = []
|
|
by_cat[cat].append(key)
|
|
|
|
for category, keys in sorted(by_cat.items()):
|
|
md.append(f"#### {category}")
|
|
md.append("")
|
|
for key in sorted(keys):
|
|
md.append(f"- `{{{{{key}}}}}`")
|
|
md.append("")
|
|
|
|
# Recommendations
|
|
md.append("---")
|
|
md.append("")
|
|
md.append("## Recommendations")
|
|
md.append("")
|
|
|
|
if gaps.get('unknown_time_window'):
|
|
md.append("### Time Window Resolution")
|
|
md.append("")
|
|
md.append("Placeholders with unknown time windows should be analyzed to determine:")
|
|
md.append("- Whether they use `latest`, `7d`, `28d`, `30d`, `90d`, or `custom`")
|
|
md.append("- Document in semantic_contract if time window is variable")
|
|
md.append("")
|
|
|
|
if gaps.get('legacy_unknown_type'):
|
|
md.append("### Type Classification")
|
|
md.append("")
|
|
md.append("Placeholders with `legacy_unknown` type should be classified as:")
|
|
md.append("- `atomic` - Single atomic value")
|
|
md.append("- `raw_data` - Structured raw data (JSON, lists)")
|
|
md.append("- `interpreted` - AI-interpreted or derived values")
|
|
md.append("")
|
|
|
|
if gaps.get('missing_data_layer_module'):
|
|
md.append("### Data Layer Tracking")
|
|
md.append("")
|
|
md.append("Placeholders without data_layer_module should be investigated:")
|
|
md.append("- Check if they call data_layer functions")
|
|
md.append("- Document direct database access if no data_layer function exists")
|
|
md.append("")
|
|
|
|
output_path = output_dir / "PLACEHOLDER_GAP_REPORT.md"
|
|
with open(output_path, 'w', encoding='utf-8') as f:
|
|
f.write("\n".join(md))
|
|
|
|
print(f"Generated: {output_path}")
|
|
return output_path
|
|
|
|
|
|
# ── 4. Export Spec ────────────────────────────────────────────────────────────
|
|
|
|
def generate_export_spec_md(output_dir: Path):
|
|
"""Generate PLACEHOLDER_EXPORT_SPEC.md"""
|
|
md = []
|
|
md.append("# Placeholder Export Specification")
|
|
md.append("")
|
|
md.append(f"**Version:** 1.0.0")
|
|
md.append(f"**Generated:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
|
|
md.append(f"**Normative Standard:** PLACEHOLDER_METADATA_REQUIREMENTS_V2_NORMATIVE.md")
|
|
md.append("")
|
|
md.append("---")
|
|
md.append("")
|
|
|
|
# Overview
|
|
md.append("## Overview")
|
|
md.append("")
|
|
md.append("The Placeholder Export API provides two endpoints:")
|
|
md.append("")
|
|
md.append("1. **Legacy Export** (`/api/prompts/placeholders/export-values`)")
|
|
md.append(" - Backward-compatible format")
|
|
md.append(" - Simple key-value pairs")
|
|
md.append(" - Organized by category")
|
|
md.append("")
|
|
md.append("2. **Extended Export** (`/api/prompts/placeholders/export-values-extended`)")
|
|
md.append(" - Complete normative metadata")
|
|
md.append(" - Runtime value resolution")
|
|
md.append(" - Gap analysis")
|
|
md.append(" - Validation results")
|
|
md.append("")
|
|
|
|
# Extended Export Format
|
|
md.append("## Extended Export Format")
|
|
md.append("")
|
|
md.append("### Root Structure")
|
|
md.append("")
|
|
md.append("```json")
|
|
md.append("{")
|
|
md.append(' "schema_version": "1.0.0",')
|
|
md.append(' "export_date": "2026-03-29T12:00:00Z",')
|
|
md.append(' "profile_id": "user-123",')
|
|
md.append(' "legacy": { ... },')
|
|
md.append(' "metadata": { ... },')
|
|
md.append(' "validation": { ... }')
|
|
md.append("}")
|
|
md.append("```")
|
|
md.append("")
|
|
|
|
# Legacy Section
|
|
md.append("### Legacy Section")
|
|
md.append("")
|
|
md.append("Maintains backward compatibility with existing export consumers.")
|
|
md.append("")
|
|
md.append("```json")
|
|
md.append('"legacy": {')
|
|
md.append(' "all_placeholders": {')
|
|
md.append(' "weight_aktuell": "85.8 kg",')
|
|
md.append(' "name": "Max Mustermann",')
|
|
md.append(' ...')
|
|
md.append(' },')
|
|
md.append(' "placeholders_by_category": {')
|
|
md.append(' "Körper": [')
|
|
md.append(' {')
|
|
md.append(' "key": "{{weight_aktuell}}",')
|
|
md.append(' "description": "Aktuelles Gewicht in kg",')
|
|
md.append(' "value": "85.8 kg",')
|
|
md.append(' "example": "85.8 kg"')
|
|
md.append(' },')
|
|
md.append(' ...')
|
|
md.append(' ],')
|
|
md.append(' ...')
|
|
md.append(' },')
|
|
md.append(' "count": 116')
|
|
md.append('}')
|
|
md.append("```")
|
|
md.append("")
|
|
|
|
# Metadata Section
|
|
md.append("### Metadata Section")
|
|
md.append("")
|
|
md.append("Complete normative metadata for all placeholders.")
|
|
md.append("")
|
|
md.append("```json")
|
|
md.append('"metadata": {')
|
|
md.append(' "flat": [')
|
|
md.append(' {')
|
|
md.append(' "key": "weight_aktuell",')
|
|
md.append(' "placeholder": "{{weight_aktuell}}",')
|
|
md.append(' "category": "Körper",')
|
|
md.append(' "type": "atomic",')
|
|
md.append(' "description": "Aktuelles Gewicht in kg",')
|
|
md.append(' "semantic_contract": "Letzter verfügbarer Gewichtseintrag...",')
|
|
md.append(' "unit": "kg",')
|
|
md.append(' "time_window": "latest",')
|
|
md.append(' "output_type": "number",')
|
|
md.append(' "format_hint": "85.8 kg",')
|
|
md.append(' "value_display": "85.8 kg",')
|
|
md.append(' "value_raw": 85.8,')
|
|
md.append(' "available": true,')
|
|
md.append(' "source": {')
|
|
md.append(' "resolver": "get_latest_weight",')
|
|
md.append(' "module": "placeholder_resolver.py",')
|
|
md.append(' "function": "get_latest_weight_data",')
|
|
md.append(' "data_layer_module": "body_metrics",')
|
|
md.append(' "source_tables": ["weight_log"]')
|
|
md.append(' },')
|
|
md.append(' ...')
|
|
md.append(' },')
|
|
md.append(' ...')
|
|
md.append(' ],')
|
|
md.append(' "by_category": { ... },')
|
|
md.append(' "summary": {')
|
|
md.append(' "total_placeholders": 116,')
|
|
md.append(' "available": 98,')
|
|
md.append(' "missing": 18,')
|
|
md.append(' "by_type": {')
|
|
md.append(' "atomic": 85,')
|
|
md.append(' "interpreted": 20,')
|
|
md.append(' "raw_data": 8,')
|
|
md.append(' "legacy_unknown": 3')
|
|
md.append(' },')
|
|
md.append(' "coverage": {')
|
|
md.append(' "fully_resolved": 75,')
|
|
md.append(' "partially_resolved": 30,')
|
|
md.append(' "unresolved": 11')
|
|
md.append(' }')
|
|
md.append(' },')
|
|
md.append(' "gaps": {')
|
|
md.append(' "unknown_time_window": ["placeholder1", ...],')
|
|
md.append(' "missing_semantic_contract": [...],')
|
|
md.append(' ...')
|
|
md.append(' }')
|
|
md.append('}')
|
|
md.append("```")
|
|
md.append("")
|
|
|
|
# Validation Section
|
|
md.append("### Validation Section")
|
|
md.append("")
|
|
md.append("Results of normative standard validation.")
|
|
md.append("")
|
|
md.append("```json")
|
|
md.append('"validation": {')
|
|
md.append(' "compliant": 89,')
|
|
md.append(' "non_compliant": 27,')
|
|
md.append(' "issues": [')
|
|
md.append(' {')
|
|
md.append(' "placeholder": "activity_summary",')
|
|
md.append(' "violations": [')
|
|
md.append(' {')
|
|
md.append(' "field": "time_window",')
|
|
md.append(' "issue": "Time window UNKNOWN should be resolved",')
|
|
md.append(' "severity": "warning"')
|
|
md.append(' }')
|
|
md.append(' ]')
|
|
md.append(' },')
|
|
md.append(' ...')
|
|
md.append(' ]')
|
|
md.append('}')
|
|
md.append("```")
|
|
md.append("")
|
|
|
|
# Usage
|
|
md.append("## API Usage")
|
|
md.append("")
|
|
md.append("### Legacy Export")
|
|
md.append("")
|
|
md.append("```bash")
|
|
md.append("GET /api/prompts/placeholders/export-values")
|
|
md.append("Header: X-Auth-Token: <token>")
|
|
md.append("```")
|
|
md.append("")
|
|
|
|
md.append("### Extended Export")
|
|
md.append("")
|
|
md.append("```bash")
|
|
md.append("GET /api/prompts/placeholders/export-values-extended")
|
|
md.append("Header: X-Auth-Token: <token>")
|
|
md.append("```")
|
|
md.append("")
|
|
|
|
# Standards Compliance
|
|
md.append("## Standards Compliance")
|
|
md.append("")
|
|
md.append("The extended export implements the following normative requirements:")
|
|
md.append("")
|
|
md.append("1. **Non-Breaking:** Legacy export remains unchanged")
|
|
md.append("2. **Complete Metadata:** All fields from normative standard")
|
|
md.append("3. **Runtime Resolution:** Values resolved for current profile")
|
|
md.append("4. **Gap Transparency:** Unresolved fields explicitly marked")
|
|
md.append("5. **Validation:** Automated compliance checking")
|
|
md.append("6. **Versioning:** Schema version for future evolution")
|
|
md.append("")
|
|
|
|
output_path = output_dir / "PLACEHOLDER_EXPORT_SPEC.md"
|
|
with open(output_path, 'w', encoding='utf-8') as f:
|
|
f.write("\n".join(md))
|
|
|
|
print(f"Generated: {output_path}")
|
|
return output_path
|
|
|
|
|
|
# ── Main ──────────────────────────────────────────────────────────────────────
|
|
|
|
def main():
|
|
"""Main catalog generation function."""
|
|
print("="*60)
|
|
print("PLACEHOLDER CATALOG GENERATOR")
|
|
print("="*60)
|
|
print()
|
|
|
|
# Setup output directory
|
|
output_dir = Path(__file__).parent.parent / "docs"
|
|
output_dir.mkdir(parents=True, exist_ok=True)
|
|
print(f"Output directory: {output_dir}")
|
|
print()
|
|
|
|
try:
|
|
# Build registry
|
|
print("Building metadata registry...")
|
|
registry = build_complete_metadata_registry()
|
|
registry = apply_manual_corrections(registry)
|
|
print(f"Loaded {registry.count()} placeholders")
|
|
print()
|
|
|
|
# Generate gap report data
|
|
print("Analyzing gaps...")
|
|
gaps = generate_gap_report(registry)
|
|
print()
|
|
|
|
# Generate all documentation files
|
|
print("Generating documentation files...")
|
|
print()
|
|
|
|
generate_json_catalog(registry, output_dir)
|
|
generate_markdown_catalog(registry, output_dir)
|
|
generate_gap_report_md(registry, gaps, output_dir)
|
|
generate_export_spec_md(output_dir)
|
|
|
|
print()
|
|
print("="*60)
|
|
print("CATALOG GENERATION COMPLETE")
|
|
print("="*60)
|
|
print()
|
|
print("Generated files:")
|
|
print(f" 1. {output_dir}/PLACEHOLDER_CATALOG_EXTENDED.json")
|
|
print(f" 2. {output_dir}/PLACEHOLDER_CATALOG_EXTENDED.md")
|
|
print(f" 3. {output_dir}/PLACEHOLDER_GAP_REPORT.md")
|
|
print(f" 4. {output_dir}/PLACEHOLDER_EXPORT_SPEC.md")
|
|
print()
|
|
|
|
return 0
|
|
|
|
except Exception as e:
|
|
print()
|
|
print(f"ERROR: {e}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
return 1
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|