""" Placeholder Metadata System - Normative Standard Implementation This module implements the normative standard for placeholder metadata as defined in PLACEHOLDER_METADATA_REQUIREMENTS_V2_NORMATIVE.md Version: 1.0.0 Status: Mandatory for all existing and future placeholders """ from dataclasses import dataclass, field, asdict from enum import Enum from typing import Optional, List, Dict, Any, Callable from datetime import datetime import json # ── Enums (Normative) ───────────────────────────────────────────────────────── class PlaceholderType(str, Enum): """Placeholder type classification (normative).""" ATOMIC = "atomic" # Single atomic value (e.g., weight, age) RAW_DATA = "raw_data" # Structured raw data (e.g., JSON lists) INTERPRETED = "interpreted" # AI-interpreted/derived values LEGACY_UNKNOWN = "legacy_unknown" # Legacy placeholder with unclear type class TimeWindow(str, Enum): """Time window classification (normative).""" LATEST = "latest" # Most recent value DAYS_7 = "7d" # 7-day window DAYS_14 = "14d" # 14-day window DAYS_28 = "28d" # 28-day window DAYS_30 = "30d" # 30-day window DAYS_90 = "90d" # 90-day window CUSTOM = "custom" # Custom time window (specify in notes) MIXED = "mixed" # Multiple time windows in output UNKNOWN = "unknown" # Time window unclear (legacy) class OutputType(str, Enum): """Output data type (normative).""" STRING = "string" NUMBER = "number" INTEGER = "integer" BOOLEAN = "boolean" JSON = "json" MARKDOWN = "markdown" DATE = "date" ENUM = "enum" UNKNOWN = "unknown" class ConfidenceLevel(str, Enum): """Data confidence/quality level.""" HIGH = "high" # Sufficient data, reliable MEDIUM = "medium" # Some data, potentially unreliable LOW = "low" # Minimal data, unreliable INSUFFICIENT = "insufficient" # No data or unusable NOT_APPLICABLE = "not_applicable" # Confidence not relevant # ── Data Classes (Normative) ────────────────────────────────────────────────── @dataclass class MissingValuePolicy: """Policy for handling missing/unavailable values.""" legacy_display: str = "nicht verfügbar" # Legacy string for missing values structured_null: bool = True # Return null in structured format reason_codes: List[str] = field(default_factory=lambda: [ "no_data", "insufficient_data", "resolver_error" ]) @dataclass class ExceptionHandling: """Exception handling strategy.""" on_error: str = "return_null_and_reason" # How to handle errors notes: str = "Keine Exception bis in Prompt-Ebene durchreichen" @dataclass class QualityFilterPolicy: """Quality filter policy (if applicable).""" enabled: bool = False min_data_points: Optional[int] = None min_confidence: Optional[ConfidenceLevel] = None filter_criteria: Optional[str] = None default_filter_level: Optional[str] = None # e.g., "quality", "acceptable", "all" null_quality_handling: Optional[str] = None # e.g., "exclude", "include_as_uncategorized" includes_poor: bool = False # Whether poor quality data is included includes_excluded: bool = False # Whether excluded data is included notes: Optional[str] = None @dataclass class ConfidenceLogic: """Confidence/quality scoring logic.""" supported: bool = False calculation: Optional[str] = None # How confidence is calculated thresholds: Optional[Dict[str, Any]] = None notes: Optional[str] = None @dataclass class SourceInfo: """Technical source information.""" resolver: str # Resolver function name in PLACEHOLDER_MAP module: str = "placeholder_resolver.py" # Module containing resolver function: Optional[str] = None # Data layer function called data_layer_module: Optional[str] = None # Data layer module (e.g., body_metrics.py) source_tables: List[str] = field(default_factory=list) # Database tables source_kind: str = "computed" # direct | computed | aggregated | derived | interpreted code_reference: Optional[str] = None # Line reference (e.g., "placeholder_resolver.py:1083") @dataclass class UsedBy: """Where the placeholder is used.""" prompts: List[str] = field(default_factory=list) # Prompt names/IDs pipelines: List[str] = field(default_factory=list) # Pipeline names/IDs charts: List[str] = field(default_factory=list) # Chart endpoint names @dataclass class PlaceholderMetadata: """ Complete metadata for a placeholder (normative standard). All fields are mandatory. Use None, [], or "unknown" for unresolved fields. """ # ── Core Identification ─────────────────────────────────────────────────── key: str # Placeholder key without braces (e.g., "weight_aktuell") placeholder: str # Full placeholder with braces (e.g., "{{weight_aktuell}}") category: str # Category (e.g., "Körper", "Ernährung") # ── Type & Semantics ────────────────────────────────────────────────────── type: PlaceholderType # atomic | raw_data | interpreted | legacy_unknown description: str # Short description semantic_contract: str # Precise semantic contract (what it represents) # ── Data Format ─────────────────────────────────────────────────────────── unit: Optional[str] # Unit (e.g., "kg", "%", "Stunden") time_window: TimeWindow # Time window for aggregation/calculation output_type: OutputType # Data type of output format_hint: Optional[str] # Example format (e.g., "85.8 kg") example_output: Optional[str] # Example resolved value # ── Runtime Values (populated during export) ────────────────────────────── value_display: Optional[str] = None # Current resolved display value value_raw: Optional[Any] = None # Current resolved raw value available: bool = True # Whether value is currently available missing_reason: Optional[str] = None # Reason if unavailable # ── Error Handling ──────────────────────────────────────────────────────── missing_value_policy: MissingValuePolicy = field(default_factory=MissingValuePolicy) exception_handling: ExceptionHandling = field(default_factory=ExceptionHandling) # ── Quality & Confidence ────────────────────────────────────────────────── quality_filter_policy: Optional[QualityFilterPolicy] = None confidence_logic: Optional[ConfidenceLogic] = None # ── Technical Source ────────────────────────────────────────────────────── source: SourceInfo = field(default_factory=lambda: SourceInfo(resolver="unknown")) dependencies: List[str] = field(default_factory=list) # Dependencies (e.g., "profile_id") # ── Usage Tracking ──────────────────────────────────────────────────────── used_by: UsedBy = field(default_factory=UsedBy) # ── Versioning & Lifecycle ──────────────────────────────────────────────── version: str = "1.0.0" deprecated: bool = False replacement: Optional[str] = None # Replacement placeholder if deprecated # ── Issues & Notes ──────────────────────────────────────────────────────── known_issues: List[str] = field(default_factory=list) notes: List[str] = field(default_factory=list) # ── Quality Assurance (Extended) ────────────────────────────────────────── schema_status: str = "draft" # draft | validated | production provenance_confidence: str = "medium" # low | medium | high contract_source: str = "inferred" # inferred | documented | validated legacy_contract_mismatch: bool = False # True if legacy description != implementation metadata_completeness_score: int = 0 # 0-100, calculated orphaned_placeholder: bool = False # True if not used in any prompt/pipeline/chart unresolved_fields: List[str] = field(default_factory=list) # Fields that couldn't be resolved def to_dict(self) -> Dict[str, Any]: """Convert to dictionary with enum handling.""" result = asdict(self) # Convert enums to strings result['type'] = self.type.value result['time_window'] = self.time_window.value result['output_type'] = self.output_type.value # Handle nested confidence level enums if self.quality_filter_policy and self.quality_filter_policy.min_confidence: result['quality_filter_policy']['min_confidence'] = \ self.quality_filter_policy.min_confidence.value return result def to_json(self) -> str: """Convert to JSON string.""" return json.dumps(self.to_dict(), indent=2, ensure_ascii=False) # ── Validation ──────────────────────────────────────────────────────────────── @dataclass class ValidationViolation: """Represents a validation violation.""" field: str issue: str severity: str # error | warning def validate_metadata(metadata: PlaceholderMetadata) -> List[ValidationViolation]: """ Validate metadata against normative standard. Returns list of violations. Empty list means compliant. """ violations = [] # ── Mandatory Fields ────────────────────────────────────────────────────── if not metadata.key or metadata.key == "unknown": violations.append(ValidationViolation("key", "Key is required", "error")) if not metadata.placeholder: violations.append(ValidationViolation("placeholder", "Placeholder string required", "error")) if not metadata.category: violations.append(ValidationViolation("category", "Category is required", "error")) if not metadata.description: violations.append(ValidationViolation("description", "Description is required", "error")) if not metadata.semantic_contract: violations.append(ValidationViolation( "semantic_contract", "Semantic contract is required", "error" )) # ── Type Validation ─────────────────────────────────────────────────────── if metadata.type == PlaceholderType.LEGACY_UNKNOWN: violations.append(ValidationViolation( "type", "Type LEGACY_UNKNOWN should be resolved", "warning" )) # ── Time Window Validation ──────────────────────────────────────────────── if metadata.time_window == TimeWindow.UNKNOWN: violations.append(ValidationViolation( "time_window", "Time window UNKNOWN should be resolved", "warning" )) # ── Output Type Validation ──────────────────────────────────────────────── if metadata.output_type == OutputType.UNKNOWN: violations.append(ValidationViolation( "output_type", "Output type UNKNOWN should be resolved", "warning" )) # ── Source Validation ───────────────────────────────────────────────────── if metadata.source.resolver == "unknown": violations.append(ValidationViolation( "source.resolver", "Resolver function must be specified", "error" )) # ── Deprecation Validation ──────────────────────────────────────────────── if metadata.deprecated and not metadata.replacement: violations.append(ValidationViolation( "replacement", "Deprecated placeholder should have replacement", "warning" )) return violations # ── Registry ────────────────────────────────────────────────────────────────── class PlaceholderMetadataRegistry: """ Central registry for all placeholder metadata. This registry ensures all placeholders have complete metadata and serves as the single source of truth for the export system. """ def __init__(self): self._registry: Dict[str, PlaceholderMetadata] = {} def register(self, metadata: PlaceholderMetadata, validate: bool = True) -> None: """ Register placeholder metadata. Args: metadata: PlaceholderMetadata instance validate: Whether to validate before registering Raises: ValueError: If validation fails with errors """ if validate: violations = validate_metadata(metadata) errors = [v for v in violations if v.severity == "error"] if errors: error_msg = "\n".join([f" - {v.field}: {v.issue}" for v in errors]) raise ValueError(f"Metadata validation failed:\n{error_msg}") self._registry[metadata.key] = metadata def get(self, key: str) -> Optional[PlaceholderMetadata]: """Get metadata by key.""" return self._registry.get(key) def get_all(self) -> Dict[str, PlaceholderMetadata]: """Get all registered metadata.""" return self._registry.copy() def get_by_category(self) -> Dict[str, List[PlaceholderMetadata]]: """Get metadata grouped by category.""" by_category: Dict[str, List[PlaceholderMetadata]] = {} for metadata in self._registry.values(): if metadata.category not in by_category: by_category[metadata.category] = [] by_category[metadata.category].append(metadata) return by_category def get_deprecated(self) -> List[PlaceholderMetadata]: """Get all deprecated placeholders.""" return [m for m in self._registry.values() if m.deprecated] def get_by_type(self, ptype: PlaceholderType) -> List[PlaceholderMetadata]: """Get placeholders by type.""" return [m for m in self._registry.values() if m.type == ptype] def count(self) -> int: """Count registered placeholders.""" return len(self._registry) def validate_all(self) -> Dict[str, List[ValidationViolation]]: """ Validate all registered placeholders. Returns dict mapping key to list of violations. """ results = {} for key, metadata in self._registry.items(): violations = validate_metadata(metadata) if violations: results[key] = violations return results # Global registry instance METADATA_REGISTRY = PlaceholderMetadataRegistry()