Implements comprehensive metadata system for all 116 placeholders according to PLACEHOLDER_METADATA_REQUIREMENTS_V2_NORMATIVE standard. Backend: - placeholder_metadata.py: Complete schema (PlaceholderMetadata, Registry, Validation) - placeholder_metadata_extractor.py: Automatic extraction with heuristics - placeholder_metadata_complete.py: Hand-curated metadata for all 116 placeholders - generate_complete_metadata.py: Metadata generation with manual corrections - generate_placeholder_catalog.py: Documentation generator (4 output files) - routers/prompts.py: New extended export endpoint (non-breaking) - tests/test_placeholder_metadata.py: Comprehensive test suite Documentation: - PLACEHOLDER_GOVERNANCE.md: Mandatory governance guidelines - PLACEHOLDER_METADATA_IMPLEMENTATION_SUMMARY.md: Complete implementation docs Features: - Normative compliant metadata for all 116 placeholders - Non-breaking extended export API endpoint - Automatic + manual metadata curation - Validation framework with error/warning levels - Gap reporting for unresolved fields - Catalog generator (JSON, Markdown, Gap Report, Export Spec) - Test suite (20+ tests) - Governance rules for future placeholders API: - GET /api/prompts/placeholders/export-values-extended (NEW) - GET /api/prompts/placeholders/export-values (unchanged, backward compatible) Architecture: - PlaceholderType enum: atomic, raw_data, interpreted, legacy_unknown - TimeWindow enum: latest, 7d, 14d, 28d, 30d, 90d, custom, mixed, unknown - OutputType enum: string, number, integer, boolean, json, markdown, date, enum - Complete source tracking (resolver, data_layer, tables) - Runtime value resolution - Usage tracking (prompts, pipelines, charts) Statistics: - 6 new Python modules (~2500+ lines) - 1 modified module (extended) - 2 new documentation files - 4 generated documentation files (to be created in Docker) - 20+ test cases - 116 placeholders inventoried Next Steps: 1. Run in Docker: python /app/generate_placeholder_catalog.py 2. Test extended export endpoint 3. Verify all 116 placeholders have complete metadata Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
351 lines
15 KiB
Python
351 lines
15 KiB
Python
"""
|
|
Placeholder Metadata System - Normative Standard Implementation
|
|
|
|
This module implements the normative standard for placeholder metadata
|
|
as defined in PLACEHOLDER_METADATA_REQUIREMENTS_V2_NORMATIVE.md
|
|
|
|
Version: 1.0.0
|
|
Status: Mandatory for all existing and future placeholders
|
|
"""
|
|
from dataclasses import dataclass, field, asdict
|
|
from enum import Enum
|
|
from typing import Optional, List, Dict, Any, Callable
|
|
from datetime import datetime
|
|
import json
|
|
|
|
|
|
# ── Enums (Normative) ─────────────────────────────────────────────────────────
|
|
|
|
class PlaceholderType(str, Enum):
|
|
"""Placeholder type classification (normative)."""
|
|
ATOMIC = "atomic" # Single atomic value (e.g., weight, age)
|
|
RAW_DATA = "raw_data" # Structured raw data (e.g., JSON lists)
|
|
INTERPRETED = "interpreted" # AI-interpreted/derived values
|
|
LEGACY_UNKNOWN = "legacy_unknown" # Legacy placeholder with unclear type
|
|
|
|
|
|
class TimeWindow(str, Enum):
|
|
"""Time window classification (normative)."""
|
|
LATEST = "latest" # Most recent value
|
|
DAYS_7 = "7d" # 7-day window
|
|
DAYS_14 = "14d" # 14-day window
|
|
DAYS_28 = "28d" # 28-day window
|
|
DAYS_30 = "30d" # 30-day window
|
|
DAYS_90 = "90d" # 90-day window
|
|
CUSTOM = "custom" # Custom time window (specify in notes)
|
|
MIXED = "mixed" # Multiple time windows in output
|
|
UNKNOWN = "unknown" # Time window unclear (legacy)
|
|
|
|
|
|
class OutputType(str, Enum):
|
|
"""Output data type (normative)."""
|
|
STRING = "string"
|
|
NUMBER = "number"
|
|
INTEGER = "integer"
|
|
BOOLEAN = "boolean"
|
|
JSON = "json"
|
|
MARKDOWN = "markdown"
|
|
DATE = "date"
|
|
ENUM = "enum"
|
|
UNKNOWN = "unknown"
|
|
|
|
|
|
class ConfidenceLevel(str, Enum):
|
|
"""Data confidence/quality level."""
|
|
HIGH = "high" # Sufficient data, reliable
|
|
MEDIUM = "medium" # Some data, potentially unreliable
|
|
LOW = "low" # Minimal data, unreliable
|
|
INSUFFICIENT = "insufficient" # No data or unusable
|
|
NOT_APPLICABLE = "not_applicable" # Confidence not relevant
|
|
|
|
|
|
# ── Data Classes (Normative) ──────────────────────────────────────────────────
|
|
|
|
@dataclass
|
|
class MissingValuePolicy:
|
|
"""Policy for handling missing/unavailable values."""
|
|
legacy_display: str = "nicht verfügbar" # Legacy string for missing values
|
|
structured_null: bool = True # Return null in structured format
|
|
reason_codes: List[str] = field(default_factory=lambda: [
|
|
"no_data", "insufficient_data", "resolver_error"
|
|
])
|
|
|
|
|
|
@dataclass
|
|
class ExceptionHandling:
|
|
"""Exception handling strategy."""
|
|
on_error: str = "return_null_and_reason" # How to handle errors
|
|
notes: str = "Keine Exception bis in Prompt-Ebene durchreichen"
|
|
|
|
|
|
@dataclass
|
|
class QualityFilterPolicy:
|
|
"""Quality filter policy (if applicable)."""
|
|
enabled: bool = False
|
|
min_data_points: Optional[int] = None
|
|
min_confidence: Optional[ConfidenceLevel] = None
|
|
filter_criteria: Optional[str] = None
|
|
notes: Optional[str] = None
|
|
|
|
|
|
@dataclass
|
|
class ConfidenceLogic:
|
|
"""Confidence/quality scoring logic."""
|
|
supported: bool = False
|
|
calculation: Optional[str] = None # How confidence is calculated
|
|
thresholds: Optional[Dict[str, Any]] = None
|
|
notes: Optional[str] = None
|
|
|
|
|
|
@dataclass
|
|
class SourceInfo:
|
|
"""Technical source information."""
|
|
resolver: str # Resolver function name in PLACEHOLDER_MAP
|
|
module: str = "placeholder_resolver.py" # Module containing resolver
|
|
function: Optional[str] = None # Data layer function called
|
|
data_layer_module: Optional[str] = None # Data layer module (e.g., body_metrics.py)
|
|
source_tables: List[str] = field(default_factory=list) # Database tables
|
|
|
|
|
|
@dataclass
|
|
class UsedBy:
|
|
"""Where the placeholder is used."""
|
|
prompts: List[str] = field(default_factory=list) # Prompt names/IDs
|
|
pipelines: List[str] = field(default_factory=list) # Pipeline names/IDs
|
|
charts: List[str] = field(default_factory=list) # Chart endpoint names
|
|
|
|
|
|
@dataclass
|
|
class PlaceholderMetadata:
|
|
"""
|
|
Complete metadata for a placeholder (normative standard).
|
|
|
|
All fields are mandatory. Use None, [], or "unknown" for unresolved fields.
|
|
"""
|
|
# ── Core Identification ───────────────────────────────────────────────────
|
|
key: str # Placeholder key without braces (e.g., "weight_aktuell")
|
|
placeholder: str # Full placeholder with braces (e.g., "{{weight_aktuell}}")
|
|
category: str # Category (e.g., "Körper", "Ernährung")
|
|
|
|
# ── Type & Semantics ──────────────────────────────────────────────────────
|
|
type: PlaceholderType # atomic | raw_data | interpreted | legacy_unknown
|
|
description: str # Short description
|
|
semantic_contract: str # Precise semantic contract (what it represents)
|
|
|
|
# ── Data Format ───────────────────────────────────────────────────────────
|
|
unit: Optional[str] # Unit (e.g., "kg", "%", "Stunden")
|
|
time_window: TimeWindow # Time window for aggregation/calculation
|
|
output_type: OutputType # Data type of output
|
|
format_hint: Optional[str] # Example format (e.g., "85.8 kg")
|
|
example_output: Optional[str] # Example resolved value
|
|
|
|
# ── Runtime Values (populated during export) ──────────────────────────────
|
|
value_display: Optional[str] = None # Current resolved display value
|
|
value_raw: Optional[Any] = None # Current resolved raw value
|
|
available: bool = True # Whether value is currently available
|
|
missing_reason: Optional[str] = None # Reason if unavailable
|
|
|
|
# ── Error Handling ────────────────────────────────────────────────────────
|
|
missing_value_policy: MissingValuePolicy = field(default_factory=MissingValuePolicy)
|
|
exception_handling: ExceptionHandling = field(default_factory=ExceptionHandling)
|
|
|
|
# ── Quality & Confidence ──────────────────────────────────────────────────
|
|
quality_filter_policy: Optional[QualityFilterPolicy] = None
|
|
confidence_logic: Optional[ConfidenceLogic] = None
|
|
|
|
# ── Technical Source ──────────────────────────────────────────────────────
|
|
source: SourceInfo = field(default_factory=lambda: SourceInfo(resolver="unknown"))
|
|
dependencies: List[str] = field(default_factory=list) # Dependencies (e.g., "profile_id")
|
|
|
|
# ── Usage Tracking ────────────────────────────────────────────────────────
|
|
used_by: UsedBy = field(default_factory=UsedBy)
|
|
|
|
# ── Versioning & Lifecycle ────────────────────────────────────────────────
|
|
version: str = "1.0.0"
|
|
deprecated: bool = False
|
|
replacement: Optional[str] = None # Replacement placeholder if deprecated
|
|
|
|
# ── Issues & Notes ────────────────────────────────────────────────────────
|
|
known_issues: List[str] = field(default_factory=list)
|
|
notes: List[str] = field(default_factory=list)
|
|
|
|
def to_dict(self) -> Dict[str, Any]:
|
|
"""Convert to dictionary with enum handling."""
|
|
result = asdict(self)
|
|
# Convert enums to strings
|
|
result['type'] = self.type.value
|
|
result['time_window'] = self.time_window.value
|
|
result['output_type'] = self.output_type.value
|
|
|
|
# Handle nested confidence level enums
|
|
if self.quality_filter_policy and self.quality_filter_policy.min_confidence:
|
|
result['quality_filter_policy']['min_confidence'] = \
|
|
self.quality_filter_policy.min_confidence.value
|
|
|
|
return result
|
|
|
|
def to_json(self) -> str:
|
|
"""Convert to JSON string."""
|
|
return json.dumps(self.to_dict(), indent=2, ensure_ascii=False)
|
|
|
|
|
|
# ── Validation ────────────────────────────────────────────────────────────────
|
|
|
|
@dataclass
|
|
class ValidationViolation:
|
|
"""Represents a validation violation."""
|
|
field: str
|
|
issue: str
|
|
severity: str # error | warning
|
|
|
|
|
|
def validate_metadata(metadata: PlaceholderMetadata) -> List[ValidationViolation]:
|
|
"""
|
|
Validate metadata against normative standard.
|
|
|
|
Returns list of violations. Empty list means compliant.
|
|
"""
|
|
violations = []
|
|
|
|
# ── Mandatory Fields ──────────────────────────────────────────────────────
|
|
if not metadata.key or metadata.key == "unknown":
|
|
violations.append(ValidationViolation("key", "Key is required", "error"))
|
|
|
|
if not metadata.placeholder:
|
|
violations.append(ValidationViolation("placeholder", "Placeholder string required", "error"))
|
|
|
|
if not metadata.category:
|
|
violations.append(ValidationViolation("category", "Category is required", "error"))
|
|
|
|
if not metadata.description:
|
|
violations.append(ValidationViolation("description", "Description is required", "error"))
|
|
|
|
if not metadata.semantic_contract:
|
|
violations.append(ValidationViolation(
|
|
"semantic_contract",
|
|
"Semantic contract is required",
|
|
"error"
|
|
))
|
|
|
|
# ── Type Validation ───────────────────────────────────────────────────────
|
|
if metadata.type == PlaceholderType.LEGACY_UNKNOWN:
|
|
violations.append(ValidationViolation(
|
|
"type",
|
|
"Type LEGACY_UNKNOWN should be resolved",
|
|
"warning"
|
|
))
|
|
|
|
# ── Time Window Validation ────────────────────────────────────────────────
|
|
if metadata.time_window == TimeWindow.UNKNOWN:
|
|
violations.append(ValidationViolation(
|
|
"time_window",
|
|
"Time window UNKNOWN should be resolved",
|
|
"warning"
|
|
))
|
|
|
|
# ── Output Type Validation ────────────────────────────────────────────────
|
|
if metadata.output_type == OutputType.UNKNOWN:
|
|
violations.append(ValidationViolation(
|
|
"output_type",
|
|
"Output type UNKNOWN should be resolved",
|
|
"warning"
|
|
))
|
|
|
|
# ── Source Validation ─────────────────────────────────────────────────────
|
|
if metadata.source.resolver == "unknown":
|
|
violations.append(ValidationViolation(
|
|
"source.resolver",
|
|
"Resolver function must be specified",
|
|
"error"
|
|
))
|
|
|
|
# ── Deprecation Validation ────────────────────────────────────────────────
|
|
if metadata.deprecated and not metadata.replacement:
|
|
violations.append(ValidationViolation(
|
|
"replacement",
|
|
"Deprecated placeholder should have replacement",
|
|
"warning"
|
|
))
|
|
|
|
return violations
|
|
|
|
|
|
# ── Registry ──────────────────────────────────────────────────────────────────
|
|
|
|
class PlaceholderMetadataRegistry:
|
|
"""
|
|
Central registry for all placeholder metadata.
|
|
|
|
This registry ensures all placeholders have complete metadata
|
|
and serves as the single source of truth for the export system.
|
|
"""
|
|
|
|
def __init__(self):
|
|
self._registry: Dict[str, PlaceholderMetadata] = {}
|
|
|
|
def register(self, metadata: PlaceholderMetadata, validate: bool = True) -> None:
|
|
"""
|
|
Register placeholder metadata.
|
|
|
|
Args:
|
|
metadata: PlaceholderMetadata instance
|
|
validate: Whether to validate before registering
|
|
|
|
Raises:
|
|
ValueError: If validation fails with errors
|
|
"""
|
|
if validate:
|
|
violations = validate_metadata(metadata)
|
|
errors = [v for v in violations if v.severity == "error"]
|
|
if errors:
|
|
error_msg = "\n".join([f" - {v.field}: {v.issue}" for v in errors])
|
|
raise ValueError(f"Metadata validation failed:\n{error_msg}")
|
|
|
|
self._registry[metadata.key] = metadata
|
|
|
|
def get(self, key: str) -> Optional[PlaceholderMetadata]:
|
|
"""Get metadata by key."""
|
|
return self._registry.get(key)
|
|
|
|
def get_all(self) -> Dict[str, PlaceholderMetadata]:
|
|
"""Get all registered metadata."""
|
|
return self._registry.copy()
|
|
|
|
def get_by_category(self) -> Dict[str, List[PlaceholderMetadata]]:
|
|
"""Get metadata grouped by category."""
|
|
by_category: Dict[str, List[PlaceholderMetadata]] = {}
|
|
for metadata in self._registry.values():
|
|
if metadata.category not in by_category:
|
|
by_category[metadata.category] = []
|
|
by_category[metadata.category].append(metadata)
|
|
return by_category
|
|
|
|
def get_deprecated(self) -> List[PlaceholderMetadata]:
|
|
"""Get all deprecated placeholders."""
|
|
return [m for m in self._registry.values() if m.deprecated]
|
|
|
|
def get_by_type(self, ptype: PlaceholderType) -> List[PlaceholderMetadata]:
|
|
"""Get placeholders by type."""
|
|
return [m for m in self._registry.values() if m.type == ptype]
|
|
|
|
def count(self) -> int:
|
|
"""Count registered placeholders."""
|
|
return len(self._registry)
|
|
|
|
def validate_all(self) -> Dict[str, List[ValidationViolation]]:
|
|
"""
|
|
Validate all registered placeholders.
|
|
|
|
Returns dict mapping key to list of violations.
|
|
"""
|
|
results = {}
|
|
for key, metadata in self._registry.items():
|
|
violations = validate_metadata(metadata)
|
|
if violations:
|
|
results[key] = violations
|
|
return results
|
|
|
|
|
|
# Global registry instance
|
|
METADATA_REGISTRY = PlaceholderMetadataRegistry()
|