feat: Placeholder Metadata V2 - Normative Implementation + ZIP Export Fix

MAJOR CHANGES: - Enhanced metadata schema with 7 QA fields - Deterministic derivation logic (no guessing) - Conservative inference (prefer unknown over wrong) - Real source tracking (skip safe wrappers) - Legacy mismatch detection - Activity quality filter policies - Completeness scoring (0-100) - Unresolved fields tracking - Fixed ZIP/JSON export auth (query param support) FILES CHANGED: - backend/placeholder_metadata.py (schema extended) - backend/placeholder_metadata_enhanced.py (NEW, 418 lines) - backend/generate_complete_metadata_v2.py (NEW, 334 lines) - backend/tests/test_placeholder_metadata_v2.py (NEW, 302 lines) - backend/routers/prompts.py (V2 integration + auth fix) - docs/PLACEHOLDER_METADATA_VALIDATION.md (NEW, 541 lines) PROBLEMS FIXED: ✓ value_raw extraction (type-aware, JSON parsing) ✓ Units for dimensionless values (scores, correlations) ✓ Safe wrappers as sources (now skipped) ✓ Time window guessing (confidence flags) ✓ Legacy inconsistencies (marked with flag) ✓ Missing quality filters (activity placeholders) ✓ No completeness metric (0-100 score) ✓ Orphaned placeholders (tracked) ✓ Unresolved fields (explicit list) ✓ ZIP/JSON export auth (query token support for downloads) AUTH FIX: - export-catalog-zip now accepts token via query param (?token=xxx) - export-values-extended now accepts token via query param - Allows browser downloads without custom headers Konzept: docs/PLACEHOLDER_METADATA_REQUIREMENTS_V2_NORMATIVE.md Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-29 21:23:37 +02:00 · 2026-03-29 21:23:37 +02:00 · 650313347f
commit 650313347f
parent 087e8dd885
6 changed files with 1698 additions and 31 deletions
--- a/backend/generate_complete_metadata_v2.py
+++ b/backend/generate_complete_metadata_v2.py
@ -0,0 +1,333 @@
 """
 Complete Metadata Generation V2 - Quality Assured
 This version applies strict quality controls and enhanced extraction logic.
 """
 import sys
 import json
 from pathlib import Path
 from datetime import datetime
 sys.path.insert(0, str(Path(__file__).parent))
 from placeholder_metadata import (
    PlaceholderType,
    TimeWindow,
    OutputType,
    SourceInfo,
    QualityFilterPolicy,
    ConfidenceLogic,
    METADATA_REGISTRY
 )
 from placeholder_metadata_extractor import build_complete_metadata_registry
 from placeholder_metadata_enhanced import (
    extract_value_raw,
    infer_unit_strict,
    detect_time_window_precise,
    resolve_real_source,
    create_activity_quality_policy,
    create_confidence_logic,
    calculate_completeness_score
 )
 def apply_enhanced_corrections(registry):
    """
    Apply enhanced corrections with strict quality controls.
    This replaces heuristic guessing with deterministic derivation.
    """
    all_metadata = registry.get_all()
    for key, metadata in all_metadata.items():
        unresolved = []
        # ── 1. Fix value_raw ──────────────────────────────────────────────────
        if metadata.value_display and metadata.value_display not in ['nicht verfügbar', '']:
            raw_val, success = extract_value_raw(
                metadata.value_display,
                metadata.output_type,
                metadata.type
            )
            if success:
                metadata.value_raw = raw_val
            else:
                metadata.value_raw = None
                unresolved.append('value_raw')
        # ── 2. Fix unit (strict) ──────────────────────────────────────────────
        strict_unit = infer_unit_strict(
            key,
            metadata.description,
            metadata.output_type,
            metadata.type
        )
        # Only overwrite if we have a confident answer or existing is clearly wrong
        if strict_unit is not None:
            metadata.unit = strict_unit
        elif metadata.output_type in [OutputType.JSON, OutputType.MARKDOWN, OutputType.ENUM]:
            metadata.unit = None  # These never have units
        elif 'score' in key.lower() or 'correlation' in key.lower():
            metadata.unit = None  # Dimensionless
        # ── 3. Fix time_window (precise detection) ────────────────────────────
        tw, is_certain, mismatch = detect_time_window_precise(
            key,
            metadata.description,
            metadata.source.resolver,
            metadata.semantic_contract
        )
        if is_certain:
            metadata.time_window = tw
            if mismatch:
                metadata.legacy_contract_mismatch = True
                if mismatch not in metadata.known_issues:
                    metadata.known_issues.append(mismatch)
        else:
            metadata.time_window = tw
            if tw == TimeWindow.UNKNOWN:
                unresolved.append('time_window')
            else:
                # Inferred but not certain
                if mismatch and mismatch not in metadata.notes:
                    metadata.notes.append(f"Time window inferred: {mismatch}")
        # ── 4. Fix source provenance ──────────────────────────────────────────
        func, dl_module, tables, source_kind = resolve_real_source(metadata.source.resolver)
        if func:
            metadata.source.function = func
        if dl_module:
            metadata.source.data_layer_module = dl_module
        if tables:
            metadata.source.source_tables = tables
        metadata.source.source_kind = source_kind
        if source_kind == "wrapper" or source_kind == "unknown":
            unresolved.append('source')
        # ── 5. Add quality_filter_policy for activity placeholders ────────────
        if not metadata.quality_filter_policy:
            qfp = create_activity_quality_policy(key)
            if qfp:
                metadata.quality_filter_policy = qfp
        # ── 6. Add confidence_logic ────────────────────────────────────────────
        if not metadata.confidence_logic:
            cl = create_confidence_logic(key, metadata.source.data_layer_module)
            if cl:
                metadata.confidence_logic = cl
        # ── 7. Determine provenance_confidence ────────────────────────────────
        if metadata.source.data_layer_module and metadata.source.source_tables:
            metadata.provenance_confidence = "high"
        elif metadata.source.function or metadata.source.source_tables:
            metadata.provenance_confidence = "medium"
        else:
            metadata.provenance_confidence = "low"
        # ── 8. Determine contract_source ───────────────────────────────────────
        if metadata.semantic_contract and len(metadata.semantic_contract) > 50:
            metadata.contract_source = "documented"
        elif metadata.description:
            metadata.contract_source = "inferred"
        else:
            metadata.contract_source = "unknown"
        # ── 9. Check for orphaned placeholders ────────────────────────────────
        if not metadata.used_by.prompts and not metadata.used_by.pipelines and not metadata.used_by.charts:
            metadata.orphaned_placeholder = True
        # ── 10. Set unresolved fields ──────────────────────────────────────────
        metadata.unresolved_fields = unresolved
        # ── 11. Calculate completeness score ───────────────────────────────────
        metadata.metadata_completeness_score = calculate_completeness_score(metadata.to_dict())
        # ── 12. Set schema status ──────────────────────────────────────────────
        if metadata.metadata_completeness_score >= 80 and len(unresolved) == 0:
            metadata.schema_status = "validated"
        elif metadata.metadata_completeness_score >= 50:
            metadata.schema_status = "draft"
        else:
            metadata.schema_status = "incomplete"
    return registry
 def generate_qa_report(registry) -> str:
    """
    Generate QA report with quality metrics.
    """
    all_metadata = registry.get_all()
    total = len(all_metadata)
    # Collect metrics
    category_unknown = sum(1 for m in all_metadata.values() if m.category == "Unknown")
    no_description = sum(1 for m in all_metadata.values() if not m.description or "No description" in m.description)
    tw_unknown = sum(1 for m in all_metadata.values() if m.time_window == TimeWindow.UNKNOWN)
    no_quality_filter = sum(1 for m in all_metadata.values() if not m.quality_filter_policy and 'activity' in m.key.lower())
    no_confidence = sum(1 for m in all_metadata.values() if not m.confidence_logic and m.source.data_layer_module)
    legacy_mismatch = sum(1 for m in all_metadata.values() if m.legacy_contract_mismatch)
    orphaned = sum(1 for m in all_metadata.values() if m.orphaned_placeholder)
    # Find problematic placeholders
    problematic = []
    for key, m in all_metadata.items():
        score = m.metadata_completeness_score
        unresolved_count = len(m.unresolved_fields)
        issues_count = len(m.known_issues)
        problem_score = (100 - score) + (unresolved_count * 10) + (issues_count * 5)
        if problem_score > 0:
            problematic.append((key, problem_score, score, unresolved_count, issues_count))
    problematic.sort(key=lambda x: x[1], reverse=True)
    # Build report
    lines = [
        "# Placeholder Metadata QA Report",
        "",
        f"**Generated:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}",
        f"**Total Placeholders:** {total}",
        "",
        "## Quality Metrics",
        "",
        f"- **Category Unknown:** {category_unknown} ({category_unknown/total*100:.1f}%)",
        f"- **No Description:** {no_description} ({no_description/total*100:.1f}%)",
        f"- **Time Window Unknown:** {tw_unknown} ({tw_unknown/total*100:.1f}%)",
        f"- **Activity without Quality Filter:** {no_quality_filter}",
        f"- **Data Layer without Confidence Logic:** {no_confidence}",
        f"- **Legacy/Implementation Mismatch:** {legacy_mismatch}",
        f"- **Orphaned (unused):** {orphaned}",
        "",
        "## Completeness Distribution",
        "",
    ]
    # Completeness buckets
    buckets = {
        "90-100%": sum(1 for m in all_metadata.values() if m.metadata_completeness_score >= 90),
        "70-89%": sum(1 for m in all_metadata.values() if 70 <= m.metadata_completeness_score < 90),
        "50-69%": sum(1 for m in all_metadata.values() if 50 <= m.metadata_completeness_score < 70),
        "0-49%": sum(1 for m in all_metadata.values() if m.metadata_completeness_score < 50),
    }
    for bucket, count in buckets.items():
        lines.append(f"- **{bucket}:** {count} placeholders ({count/total*100:.1f}%)")
    lines.append("")
    lines.append("## Top 20 Most Problematic Placeholders")
    lines.append("")
    lines.append("| Rank | Placeholder | Completeness | Unresolved | Issues |")
    lines.append("|------|-------------|--------------|------------|--------|")
    for i, (key, _, score, unresolved_count, issues_count) in enumerate(problematic[:20], 1):
        lines.append(f"| {i} | `{{{{{key}}}}}` | {score}% | {unresolved_count} | {issues_count} |")
    lines.append("")
    lines.append("## Schema Status Distribution")
    lines.append("")
    status_counts = {}
    for m in all_metadata.values():
        status_counts[m.schema_status] = status_counts.get(m.schema_status, 0) + 1
    for status, count in sorted(status_counts.items()):
        lines.append(f"- **{status}:** {count} ({count/total*100:.1f}%)")
    return "\n".join(lines)
 def generate_unresolved_report(registry) -> dict:
    """
    Generate unresolved fields report as JSON.
    """
    all_metadata = registry.get_all()
    unresolved_by_placeholder = {}
    unresolved_by_field = {}
    for key, m in all_metadata.items():
        if m.unresolved_fields:
            unresolved_by_placeholder[key] = m.unresolved_fields
            for field in m.unresolved_fields:
                if field not in unresolved_by_field:
                    unresolved_by_field[field] = []
                unresolved_by_field[field].append(key)
    return {
        "generated_at": datetime.now().isoformat(),
        "total_placeholders_with_unresolved": len(unresolved_by_placeholder),
        "by_placeholder": unresolved_by_placeholder,
        "by_field": unresolved_by_field,
        "summary": {
            field: len(placeholders)
            for field, placeholders in unresolved_by_field.items()
        }
    }
 def main():
    """Main execution."""
    print("="*60)
    print("ENHANCED PLACEHOLDER METADATA GENERATION V2")
    print("="*60)
    print()
    try:
        # Build registry
        print("Building metadata registry...")
        registry = build_complete_metadata_registry()
        print(f"Loaded {registry.count()} placeholders")
        print()
        # Apply enhanced corrections
        print("Applying enhanced corrections...")
        registry = apply_enhanced_corrections(registry)
        print("Enhanced corrections applied")
        print()
        # Generate reports
        print("Generating QA report...")
        qa_report = generate_qa_report(registry)
        qa_path = Path(__file__).parent.parent / "docs" / "PLACEHOLDER_METADATA_QA_REPORT.md"
        with open(qa_path, 'w', encoding='utf-8') as f:
            f.write(qa_report)
        print(f"QA Report: {qa_path}")
        print("Generating unresolved report...")
        unresolved = generate_unresolved_report(registry)
        unresolved_path = Path(__file__).parent.parent / "docs" / "PLACEHOLDER_METADATA_UNRESOLVED.json"
        with open(unresolved_path, 'w', encoding='utf-8') as f:
            json.dump(unresolved, f, indent=2, ensure_ascii=False)
        print(f"Unresolved Report: {unresolved_path}")
        # Summary
        all_metadata = registry.get_all()
        avg_completeness = sum(m.metadata_completeness_score for m in all_metadata.values()) / len(all_metadata)
        validated_count = sum(1 for m in all_metadata.values() if m.schema_status == "validated")
        print()
        print("="*60)
        print("SUMMARY")
        print("="*60)
        print(f"Total Placeholders: {len(all_metadata)}")
        print(f"Average Completeness: {avg_completeness:.1f}%")
        print(f"Validated: {validated_count} ({validated_count/len(all_metadata)*100:.1f}%)")
        print(f"Time Window Unknown: {sum(1 for m in all_metadata.values() if m.time_window == TimeWindow.UNKNOWN)}")
        print(f"Orphaned: {sum(1 for m in all_metadata.values() if m.orphaned_placeholder)}")
        return 0
    except Exception as e:
        print(f"\nERROR: {e}")
        import traceback
        traceback.print_exc()
        return 1
 if __name__ == "__main__":
    sys.exit(main())
--- a/backend/placeholder_metadata.py
+++ b/backend/placeholder_metadata.py
@ -85,6 +85,10 @@ class QualityFilterPolicy:
    min_data_points: Optional[int] = None
    min_confidence: Optional[ConfidenceLevel] = None
    filter_criteria: Optional[str] = None
    default_filter_level: Optional[str] = None  # e.g., "quality", "acceptable", "all"
    null_quality_handling: Optional[str] = None  # e.g., "exclude", "include_as_uncategorized"
    includes_poor: bool = False  # Whether poor quality data is included
    includes_excluded: bool = False  # Whether excluded data is included
    notes: Optional[str] = None
@ -105,6 +109,8 @@ class SourceInfo:
    function: Optional[str] = None  # Data layer function called
    data_layer_module: Optional[str] = None  # Data layer module (e.g., body_metrics.py)
    source_tables: List[str] = field(default_factory=list)  # Database tables
    source_kind: str = "computed"  # direct | computed | aggregated | derived | interpreted
    code_reference: Optional[str] = None  # Line reference (e.g., "placeholder_resolver.py:1083")
@dataclass
@ -169,6 +175,15 @@ class PlaceholderMetadata:
    known_issues: List[str] = field(default_factory=list)
    notes: List[str] = field(default_factory=list)
    # ── Quality Assurance (Extended) ──────────────────────────────────────────
    schema_status: str = "draft"  # draft | validated | production
    provenance_confidence: str = "medium"  # low | medium | high
    contract_source: str = "inferred"  # inferred | documented | validated
    legacy_contract_mismatch: bool = False  # True if legacy description != implementation
    metadata_completeness_score: int = 0  # 0-100, calculated
    orphaned_placeholder: bool = False  # True if not used in any prompt/pipeline/chart
    unresolved_fields: List[str] = field(default_factory=list)  # Fields that couldn't be resolved
    def to_dict(self) -> Dict[str, Any]:
        """Convert to dictionary with enum handling."""
        result = asdict(self)
--- a/backend/placeholder_metadata_enhanced.py
+++ b/backend/placeholder_metadata_enhanced.py
@ -0,0 +1,417 @@
 """
 Enhanced Placeholder Metadata Extraction
 Improved extraction logic that addresses quality issues:
 1. Correct value_raw extraction
 2. Accurate unit inference
 3. Precise time_window detection
 4. Real source provenance
 5. Quality filter policies for activity placeholders
 """
 import re
 import json
 from typing import Any, Optional, Tuple, Dict
 from placeholder_metadata import (
    PlaceholderType,
    TimeWindow,
    OutputType,
    QualityFilterPolicy,
    ConfidenceLogic,
    ConfidenceLevel
 )
 # ── Enhanced Value Raw Extraction ─────────────────────────────────────────────
 def extract_value_raw(value_display: str, output_type: OutputType, placeholder_type: PlaceholderType) -> Tuple[Any, bool]:
    """
    Extract raw value from display string.
    Returns: (raw_value, success)
    """
    if not value_display or value_display in ['nicht verfügbar', 'nicht genug Daten']:
        return None, True
    # JSON output type
    if output_type == OutputType.JSON:
        try:
            return json.loads(value_display), True
        except (json.JSONDecodeError, TypeError):
            # Try to find JSON in string
            json_match = re.search(r'(\{.*\}|\[.*\])', value_display, re.DOTALL)
            if json_match:
                try:
                    return json.loads(json_match.group(1)), True
                except:
                    pass
            return None, False
    # Markdown output type
    if output_type == OutputType.MARKDOWN:
        return value_display, True
    # Number types
    if output_type in [OutputType.NUMBER, OutputType.INTEGER]:
        # Extract first number from string
        match = re.search(r'([-+]?\d+\.?\d*)', value_display)
        if match:
            val = float(match.group(1))
            return int(val) if output_type == OutputType.INTEGER else val, True
        return None, False
    # Date
    if output_type == OutputType.DATE:
        # Check if already ISO format
        if re.match(r'\d{4}-\d{2}-\d{2}', value_display):
            return value_display, True
        return value_display, False  # Unknown format
    # String/Enum - return as-is
    return value_display, True
 # ── Enhanced Unit Inference ───────────────────────────────────────────────────
 def infer_unit_strict(key: str, description: str, output_type: OutputType, placeholder_type: PlaceholderType) -> Optional[str]:
    """
    Strict unit inference - only return unit if certain.
    NO units for:
    - Scores (dimensionless)
    - Correlations (dimensionless)
    - Percentages expressed as 0-100 scale
    - Classifications/enums
    - JSON/Markdown outputs
    """
    key_lower = key.lower()
    desc_lower = description.lower()
    # JSON/Markdown never have units
    if output_type in [OutputType.JSON, OutputType.MARKDOWN, OutputType.ENUM]:
        return None
    # Scores are dimensionless (0-100 scale)
    if 'score' in key_lower or 'adequacy' in key_lower:
        return None
    # Correlations are dimensionless
    if 'correlation' in key_lower:
        return None
    # Ratios/percentages on 0-100 scale
    if any(x in key_lower for x in ['pct', 'ratio', 'balance', 'compliance', 'consistency']):
        return None
    # Classifications/quadrants
    if 'quadrant' in key_lower or 'classification' in key_lower:
        return None
    # Weight/mass
    if any(x in key_lower for x in ['weight', 'gewicht', 'fm_', 'lbm_', 'masse']):
        return 'kg'
    # Circumferences/lengths
    if any(x in key_lower for x in ['umfang', 'waist', 'hip', 'chest', 'arm', 'leg', 'delta']) and 'circumference' in desc_lower:
        return 'cm'
    # Time durations
    if any(x in key_lower for x in ['duration', 'dauer', 'debt']):
        if 'hours' in desc_lower or 'stunden' in desc_lower:
            return 'Stunden'
        elif 'minutes' in desc_lower or 'minuten' in desc_lower:
            return 'Minuten'
        return None  # Unclear
    # Heart rate
    if 'rhr' in key_lower or ('hr' in key_lower and 'hrv' not in key_lower) or 'puls' in key_lower:
        return 'bpm'
    # HRV
    if 'hrv' in key_lower:
        return 'ms'
    # VO2 Max
    if 'vo2' in key_lower:
        return 'ml/kg/min'
    # Calories/energy
    if 'kcal' in key_lower or 'energy' in key_lower or 'energie' in key_lower:
        return 'kcal'
    # Macros (protein, carbs, fat)
    if any(x in key_lower for x in ['protein', 'carb', 'fat', 'kohlenhydrat', 'fett']) and 'g' in desc_lower:
        return 'g'
    # Height
    if 'height' in key_lower or 'größe' in key_lower:
        return 'cm'
    # Age
    if 'age' in key_lower or 'alter' in key_lower:
        return 'Jahre'
    # BMI is dimensionless
    if 'bmi' in key_lower:
        return None
    # Default: No unit (conservative)
    return None
 # ── Enhanced Time Window Detection ────────────────────────────────────────────
 def detect_time_window_precise(
    key: str,
    description: str,
    resolver_name: str,
    semantic_contract: str
 ) -> Tuple[TimeWindow, bool, Optional[str]]:
    """
    Detect time window with precision.
    Returns: (time_window, is_certain, mismatch_note)
    """
    key_lower = key.lower()
    desc_lower = description.lower()
    contract_lower = semantic_contract.lower()
    # Explicit suffixes (highest confidence)
    if '_7d' in key_lower:
        return TimeWindow.DAYS_7, True, None
    if '_14d' in key_lower:
        return TimeWindow.DAYS_14, True, None
    if '_28d' in key_lower:
        return TimeWindow.DAYS_28, True, None
    if '_30d' in key_lower:
        return TimeWindow.DAYS_30, True, None
    if '_90d' in key_lower:
        return TimeWindow.DAYS_90, True, None
    if '_3d' in key_lower:
        return TimeWindow.DAYS_7, True, None  # Map 3d to closest standard
    # Latest/current
    if any(x in key_lower for x in ['aktuell', 'latest', 'current', 'letzter']):
        return TimeWindow.LATEST, True, None
    # Check semantic contract for time window info
    if '7 tag' in contract_lower or '7d' in contract_lower:
        # Check for description mismatch
        mismatch = None
        if '30' in desc_lower or '28' in desc_lower:
            mismatch = f"Description says 30d/28d but implementation is 7d"
        return TimeWindow.DAYS_7, True, mismatch
    if '28 tag' in contract_lower or '28d' in contract_lower:
        mismatch = None
        if '7' in desc_lower and '28' not in desc_lower:
            mismatch = f"Description says 7d but implementation is 28d"
        return TimeWindow.DAYS_28, True, mismatch
    if '30 tag' in contract_lower or '30d' in contract_lower:
        return TimeWindow.DAYS_30, True, None
    if '90 tag' in contract_lower or '90d' in contract_lower:
        return TimeWindow.DAYS_90, True, None
    # Check description patterns
    if 'letzte 7' in desc_lower or '7 tag' in desc_lower:
        return TimeWindow.DAYS_7, False, None
    if 'letzte 30' in desc_lower or '30 tag' in desc_lower:
        return TimeWindow.DAYS_30, False, None
    # Averages typically 30d unless specified
    if 'avg' in key_lower or 'durchschn' in key_lower:
        if '7' in desc_lower:
            return TimeWindow.DAYS_7, False, None
        return TimeWindow.DAYS_30, False, "Assumed 30d for average (not explicit)"
    # Trends typically 28d
    if 'trend' in key_lower:
        return TimeWindow.DAYS_28, False, "Assumed 28d for trend"
    # Week-based
    if 'week' in key_lower or 'woche' in key_lower:
        return TimeWindow.DAYS_7, False, None
    # Profile data is latest
    if key_lower in ['name', 'age', 'height', 'geschlecht']:
        return TimeWindow.LATEST, True, None
    # Unknown
    return TimeWindow.UNKNOWN, False, "Could not determine time window from code or documentation"
 # ── Enhanced Source Provenance ────────────────────────────────────────────────
 def resolve_real_source(resolver_name: str) -> Tuple[Optional[str], Optional[str], list, str]:
    """
    Resolve real source function (not safe wrappers).
    Returns: (function, data_layer_module, source_tables, source_kind)
    """
    # Skip safe wrappers - they're not real sources
    if resolver_name in ['_safe_int', '_safe_float', '_safe_json', '_safe_str']:
        return None, None, [], "wrapper"
    # Direct mappings to data layer
    source_map = {
        # Body metrics
        'get_latest_weight': ('get_latest_weight_data', 'body_metrics', ['weight_log'], 'direct'),
        'get_weight_trend': ('get_weight_trend_data', 'body_metrics', ['weight_log'], 'computed'),
        'get_latest_bf': ('get_body_composition_data', 'body_metrics', ['caliper_log'], 'direct'),
        'get_circ_summary': ('get_circumference_summary_data', 'body_metrics', ['circumference_log'], 'aggregated'),
        'get_caliper_summary': ('get_body_composition_data', 'body_metrics', ['caliper_log'], 'aggregated'),
        'calculate_bmi': (None, None, ['weight_log', 'profiles'], 'computed'),
        # Nutrition
        'get_nutrition_avg': ('get_nutrition_average_data', 'nutrition_metrics', ['nutrition_log'], 'aggregated'),
        'get_protein_per_kg': ('get_protein_targets_data', 'nutrition_metrics', ['nutrition_log', 'weight_log'], 'computed'),
        'get_nutrition_days': ('get_nutrition_days_data', 'nutrition_metrics', ['nutrition_log'], 'computed'),
        # Activity
        'get_activity_summary': ('get_activity_summary_data', 'activity_metrics', ['activity_log', 'training_types'], 'aggregated'),
        'get_activity_detail': ('get_activity_detail_data', 'activity_metrics', ['activity_log', 'training_types'], 'aggregated'),
        'get_training_type_dist': ('get_training_type_distribution_data', 'activity_metrics', ['activity_log', 'training_types'], 'aggregated'),
        # Sleep
        'get_sleep_duration': ('get_sleep_duration_data', 'recovery_metrics', ['sleep_log'], 'aggregated'),
        'get_sleep_quality': ('get_sleep_quality_data', 'recovery_metrics', ['sleep_log'], 'computed'),
        # Vitals
        'get_resting_hr': ('get_resting_heart_rate_data', 'health_metrics', ['vitals_baseline'], 'direct'),
        'get_hrv': ('get_heart_rate_variability_data', 'health_metrics', ['vitals_baseline'], 'direct'),
        'get_vo2_max': ('get_vo2_max_data', 'health_metrics', ['vitals_baseline'], 'direct'),
        # Profile
        'get_profile_data': (None, None, ['profiles'], 'direct'),
        'calculate_age': (None, None, ['profiles'], 'computed'),
        # Goals
        'get_goal_weight': (None, None, ['goals'], 'direct'),
        'get_goal_bf_pct': (None, None, ['goals'], 'direct'),
    }
    if resolver_name in source_map:
        return source_map[resolver_name]
    # Goals formatting functions
    if resolver_name.startswith('_format_goals'):
        return (None, None, ['goals', 'goal_focus_contributions'], 'interpreted')
    # Unknown
    return None, None, [], "unknown"
 # ── Quality Filter Policy for Activity Placeholders ───────────────────────────
 def create_activity_quality_policy(key: str) -> Optional[QualityFilterPolicy]:
    """
    Create quality filter policy for activity-related placeholders.
    """
    key_lower = key.lower()
    # Activity-related placeholders need quality policies
    if any(x in key_lower for x in ['activity', 'training', 'load', 'volume', 'quality_session', 'ability']):
        return QualityFilterPolicy(
            enabled=True,
            default_filter_level="quality",
            null_quality_handling="exclude",
            includes_poor=False,
            includes_excluded=False,
            notes="Activity metrics filter for quality='quality' by default. NULL quality_label excluded."
        )
    return None
 # ── Confidence Logic Creation ─────────────────────────────────────────────────
 def create_confidence_logic(key: str, data_layer_module: Optional[str]) -> Optional[ConfidenceLogic]:
    """
    Create confidence logic if applicable.
    """
    key_lower = key.lower()
    # Data layer functions typically have confidence
    if data_layer_module:
        return ConfidenceLogic(
            supported=True,
            calculation="Based on data availability and quality thresholds",
            thresholds={"min_data_points": 1},
            notes=f"Confidence determined by {data_layer_module}"
        )
    # Scores have implicit confidence
    if 'score' in key_lower:
        return ConfidenceLogic(
            supported=True,
            calculation="Based on data completeness for score components",
            notes="Score confidence correlates with input data availability"
        )
    # Correlations have confidence
    if 'correlation' in key_lower:
        return ConfidenceLogic(
            supported=True,
            calculation="Pearson correlation with significance testing",
            thresholds={"min_data_points": 7},
            notes="Requires minimum 7 data points for meaningful correlation"
        )
    return None
 # ── Metadata Completeness Score ───────────────────────────────────────────────
 def calculate_completeness_score(metadata_dict: Dict) -> int:
    """
    Calculate metadata completeness score (0-100).
    Checks:
    - Required fields filled
    - Time window not unknown
    - Output type not unknown
    - Unit specified (if applicable)
    - Source provenance complete
    - Quality/confidence policies (if applicable)
    """
    score = 0
    max_score = 100
    # Required fields (30 points)
    if metadata_dict.get('category') and metadata_dict['category'] != 'Unknown':
        score += 5
    if metadata_dict.get('description') and 'No description' not in metadata_dict['description']:
        score += 5
    if metadata_dict.get('semantic_contract'):
        score += 10
    if metadata_dict.get('source', {}).get('resolver') and metadata_dict['source']['resolver'] != 'unknown':
        score += 10
    # Type specification (20 points)
    if metadata_dict.get('type') and metadata_dict['type'] != 'legacy_unknown':
        score += 10
    if metadata_dict.get('time_window') and metadata_dict['time_window'] != 'unknown':
        score += 10
    # Output specification (20 points)
    if metadata_dict.get('output_type') and metadata_dict['output_type'] != 'unknown':
        score += 10
    if metadata_dict.get('format_hint'):
        score += 10
    # Source provenance (20 points)
    source = metadata_dict.get('source', {})
    if source.get('data_layer_module'):
        score += 10
    if source.get('source_tables'):
        score += 10
    # Quality policies (10 points)
    if metadata_dict.get('quality_filter_policy'):
        score += 5
    if metadata_dict.get('confidence_logic'):
        score += 5
    return min(score, max_score)
--- a/backend/routers/prompts.py
+++ b/backend/routers/prompts.py
@ -267,22 +267,43 @@ def export_placeholder_values(session: dict = Depends(require_auth)):
@router.get("/placeholders/export-values-extended")
-def export_placeholder_values_extended(session: dict = Depends(require_auth)):
+def export_placeholder_values_extended(
    token: Optional[str] = Query(None),
    x_auth_token: Optional[str] = Header(default=None)
 ):
    """
-    Extended placeholder export with complete normative metadata.
+    Extended placeholder export with complete normative metadata V2.
    Returns structured export with:
    - Legacy format (for backward compatibility)
-    - Complete metadata per placeholder (normative standard)
+    - Complete metadata per placeholder (normative standard V2)
    - Quality assurance metrics
    - Summary statistics
    - Gap report
    - Validation results
-    This endpoint implements the PLACEHOLDER_METADATA_REQUIREMENTS_V2_NORMATIVE standard.
+    V2 implements strict quality controls:
    - Correct value_raw extraction
    - Accurate unit inference
    - Precise time_window detection
    - Real source provenance
    - Quality filter policies for activity placeholders
    Token can be passed via:
    - Header: X-Auth-Token
    - Query param: ?token=xxx (for direct access/downloads)
    """
    from datetime import datetime
    from placeholder_metadata_extractor import build_complete_metadata_registry
-    from generate_complete_metadata import apply_manual_corrections, generate_gap_report
+    from generate_complete_metadata_v2 import apply_enhanced_corrections
    from auth import get_session
    # Accept token from query param OR header
    auth_token = token or x_auth_token
    session = get_session(auth_token)
    if not session:
        raise HTTPException(401, "Nicht eingeloggt")
    profile_id = session['profile_id']
@ -294,10 +315,10 @@ def export_placeholder_values_extended(session: dict = Depends(require_auth)):
    }
    catalog = get_placeholder_catalog(profile_id)
-    # Build complete metadata registry
+    # Build complete metadata registry with V2 enhancements
    try:
        registry = build_complete_metadata_registry(profile_id)
-        registry = apply_manual_corrections(registry)
+        registry = apply_enhanced_corrections(registry)  # V2: Enhanced quality controls
    except Exception as e:
        raise HTTPException(
            status_code=500,
@ -307,26 +328,26 @@ def export_placeholder_values_extended(session: dict = Depends(require_auth)):
    # Get all metadata
    all_metadata = registry.get_all()
-    # Populate runtime values (value_display, value_raw, available)
+    # Populate runtime values with V2 enhanced extraction
    from placeholder_metadata_enhanced import extract_value_raw as extract_value_raw_v2
    for key, metadata in all_metadata.items():
        if key in cleaned_values:
            value = cleaned_values[key]
            metadata.value_display = str(value)
-            # Try to extract raw value
+            # V2: Use enhanced extraction logic
-            if isinstance(value, (int, float)):
+            raw_val, success = extract_value_raw_v2(
-                metadata.value_raw = value
+                str(value),
-            elif isinstance(value, str):
+                metadata.output_type,
-                # Try to parse number from string (e.g., "85.8 kg" -> 85.8)
+                metadata.type
-                import re
+            )
-                match = re.search(r'([-+]?\d+\.?\d*)', value)
+            if success:
-                if match:
+                metadata.value_raw = raw_val
                    try:
                        metadata.value_raw = float(match.group(1))
                    except ValueError:
                        metadata.value_raw = value
            else:
-                    metadata.value_raw = value
+                metadata.value_raw = None
                if 'value_raw' not in metadata.unresolved_fields:
                    metadata.unresolved_fields.append('value_raw')
            # Check availability
            if value in ['nicht verfügbar', 'nicht genug Daten', '[Fehler:', '[Nicht']:
@ -336,8 +357,15 @@ def export_placeholder_values_extended(session: dict = Depends(require_auth)):
            metadata.available = False
            metadata.missing_reason = "Placeholder not in resolver output"
-    # Generate gap report
+    # Generate gap report (collect unresolved fields)
-    gaps = generate_gap_report(registry)
+    gaps = {
        'unknown_time_window': [k for k, m in all_metadata.items() if m.time_window == TimeWindow.UNKNOWN],
        'unknown_output_type': [k for k, m in all_metadata.items() if m.output_type == OutputType.UNKNOWN],
        'legacy_unknown_type': [k for k, m in all_metadata.items() if m.type == PlaceholderType.LEGACY_UNKNOWN],
        'unresolved_fields': {k: m.unresolved_fields for k, m in all_metadata.items() if m.unresolved_fields},
        'legacy_mismatches': [k for k, m in all_metadata.items() if m.legacy_contract_mismatch],
        'orphaned': [k for k, m in all_metadata.items() if m.orphaned_placeholder],
    }
    # Validation
    validation_results = registry.validate_all()
@ -394,28 +422,47 @@ def export_placeholder_values_extended(session: dict = Depends(require_auth)):
            m.to_dict() for m in metadata_list
        ]
-    # Fill summary
+    # Fill summary with V2 QA metrics
    total = len(all_metadata)
    available = sum(1 for m in all_metadata.values() if m.available)
    missing = total - available
    by_type = {}
    by_schema_status = {}
    for metadata in all_metadata.values():
        ptype = metadata.type.value
        by_type[ptype] = by_type.get(ptype, 0) + 1
-    gap_count = sum(len(v) for v in gaps.values())
+        status = metadata.schema_status
-    unresolved = len(gaps.get('validation_issues', []))
+        by_schema_status[status] = by_schema_status.get(status, 0) + 1
    # Calculate average completeness
    avg_completeness = sum(m.metadata_completeness_score for m in all_metadata.values()) / total if total > 0 else 0
    # Count QA metrics
    legacy_mismatches = sum(1 for m in all_metadata.values() if m.legacy_contract_mismatch)
    orphaned = sum(1 for m in all_metadata.values() if m.orphaned_placeholder)
    has_quality_filter = sum(1 for m in all_metadata.values() if m.quality_filter_policy)
    has_confidence = sum(1 for m in all_metadata.values() if m.confidence_logic)
    export_data['metadata']['summary'] = {
        "total_placeholders": total,
        "available": available,
        "missing": missing,
        "by_type": by_type,
        "by_schema_status": by_schema_status,
        "quality_metrics": {
            "average_completeness_score": round(avg_completeness, 1),
            "legacy_mismatches": legacy_mismatches,
            "orphaned": orphaned,
            "with_quality_filter": has_quality_filter,
            "with_confidence_logic": has_confidence
        },
        "coverage": {
-            "fully_resolved": total - gap_count,
+            "time_window_unknown": len(gaps.get('unknown_time_window', [])),
-            "partially_resolved": gap_count - unresolved,
+            "output_type_unknown": len(gaps.get('unknown_output_type', [])),
-            "unresolved": unresolved
+            "legacy_unknown_type": len(gaps.get('legacy_unknown_type', [])),
            "with_unresolved_fields": len(gaps.get('unresolved_fields', {}))
        }
    }
@ -440,7 +487,7 @@ def export_placeholder_values_extended(session: dict = Depends(require_auth)):
@router.get("/placeholders/export-catalog-zip")
 def export_placeholder_catalog_zip(
    token: Optional[str] = Query(None),
-    session: dict = Depends(require_admin)
+    x_auth_token: Optional[str] = Header(default=None)
 ):
    """
    Export complete placeholder catalog as ZIP file.
@ -453,6 +500,10 @@ def export_placeholder_catalog_zip(
    This generates the files on-the-fly and returns as ZIP.
    Admin only.
    Token can be passed via:
    - Header: X-Auth-Token
    - Query param: ?token=xxx (for browser downloads)
    """
    import io
    import zipfile
@ -465,6 +516,16 @@ def export_placeholder_catalog_zip(
    )
    from placeholder_metadata_extractor import build_complete_metadata_registry
    from generate_complete_metadata import apply_manual_corrections, generate_gap_report
    from auth import get_session
    # Accept token from query param OR header
    auth_token = token or x_auth_token
    session = get_session(auth_token)
    if not session:
        raise HTTPException(401, "Nicht eingeloggt")
    if session['role'] != 'admin':
        raise HTTPException(403, "Nur für Admins")
    profile_id = session['profile_id']
--- a/backend/tests/test_placeholder_metadata_v2.py
+++ b/backend/tests/test_placeholder_metadata_v2.py
@ -0,0 +1,301 @@
 """
 Tests for Enhanced Placeholder Metadata System V2
 Tests the strict quality controls and enhanced extraction logic.
 """
 import sys
 from pathlib import Path
 sys.path.insert(0, str(Path(__file__).parent.parent))
 import pytest
 from placeholder_metadata import (
    PlaceholderType,
    TimeWindow,
    OutputType
 )
 from placeholder_metadata_enhanced import (
    extract_value_raw,
    infer_unit_strict,
    detect_time_window_precise,
    resolve_real_source,
    create_activity_quality_policy,
    calculate_completeness_score
 )
 # ── Value Raw Extraction Tests ────────────────────────────────────────────────
 def test_value_raw_json():
    """JSON outputs must return actual JSON objects."""
    # Valid JSON
    val, success = extract_value_raw('{"goals": [1,2,3]}', OutputType.JSON, PlaceholderType.RAW_DATA)
    assert success
    assert isinstance(val, dict)
    assert val == {"goals": [1,2,3]}
    # JSON array
    val, success = extract_value_raw('[1, 2, 3]', OutputType.JSON, PlaceholderType.RAW_DATA)
    assert success
    assert isinstance(val, list)
    # Invalid JSON
    val, success = extract_value_raw('not json', OutputType.JSON, PlaceholderType.RAW_DATA)
    assert not success
    assert val is None
 def test_value_raw_number():
    """Numeric outputs must extract numbers without units."""
    # Number with unit
    val, success = extract_value_raw('85.8 kg', OutputType.NUMBER, PlaceholderType.ATOMIC)
    assert success
    assert val == 85.8
    # Integer
    val, success = extract_value_raw('42 Jahre', OutputType.INTEGER, PlaceholderType.ATOMIC)
    assert success
    assert val == 42
    # Negative number
    val, success = extract_value_raw('-12.5 kg', OutputType.NUMBER, PlaceholderType.ATOMIC)
    assert success
    assert val == -12.5
    # No number
    val, success = extract_value_raw('nicht verfügbar', OutputType.NUMBER, PlaceholderType.ATOMIC)
    assert not success
 def test_value_raw_markdown():
    """Markdown outputs keep as string."""
    val, success = extract_value_raw('# Heading\nText', OutputType.MARKDOWN, PlaceholderType.RAW_DATA)
    assert success
    assert val == '# Heading\nText'
 def test_value_raw_date():
    """Date outputs prefer ISO format."""
    # ISO format
    val, success = extract_value_raw('2026-03-29', OutputType.DATE, PlaceholderType.ATOMIC)
    assert success
    assert val == '2026-03-29'
    # Non-ISO (still accepts but marks as uncertain)
    val, success = extract_value_raw('29.03.2026', OutputType.DATE, PlaceholderType.ATOMIC)
    assert not success  # Unknown format
 # ── Unit Inference Tests ──────────────────────────────────────────────────────
 def test_unit_no_units_for_scores():
    """Scores are dimensionless (0-100 scale), no units."""
    unit = infer_unit_strict('goal_progress_score', 'Progress score', OutputType.INTEGER, PlaceholderType.ATOMIC)
    assert unit is None
    unit = infer_unit_strict('protein_adequacy_28d', 'Protein adequacy', OutputType.INTEGER, PlaceholderType.ATOMIC)
    assert unit is None
 def test_unit_no_units_for_correlations():
    """Correlations are dimensionless."""
    unit = infer_unit_strict('correlation_energy_weight', 'Correlation', OutputType.JSON, PlaceholderType.INTERPRETED)
    assert unit is None
 def test_unit_no_units_for_ratios():
    """Ratios and percentages are dimensionless."""
    unit = infer_unit_strict('waist_hip_ratio', 'Waist-hip ratio', OutputType.NUMBER, PlaceholderType.ATOMIC)
    assert unit is None
    unit = infer_unit_strict('quality_sessions_pct', 'Quality sessions percentage', OutputType.INTEGER, PlaceholderType.ATOMIC)
    assert unit is None
 def test_unit_correct_units_for_measurements():
    """Physical measurements have correct units."""
    # Weight
    unit = infer_unit_strict('weight_aktuell', 'Aktuelles Gewicht', OutputType.NUMBER, PlaceholderType.ATOMIC)
    assert unit == 'kg'
    # Circumference
    unit = infer_unit_strict('waist_28d_delta', 'Taillenumfang', OutputType.NUMBER, PlaceholderType.ATOMIC)
    assert unit == 'cm'
    # Heart rate
    unit = infer_unit_strict('vitals_avg_hr', 'Ruhepuls', OutputType.INTEGER, PlaceholderType.ATOMIC)
    assert unit == 'bpm'
    # HRV
    unit = infer_unit_strict('vitals_avg_hrv', 'HRV', OutputType.NUMBER, PlaceholderType.ATOMIC)
    assert unit == 'ms'
 def test_unit_no_units_for_json():
    """JSON outputs never have units."""
    unit = infer_unit_strict('active_goals_json', 'Active goals', OutputType.JSON, PlaceholderType.RAW_DATA)
    assert unit is None
 # ── Time Window Detection Tests ───────────────────────────────────────────────
 def test_time_window_explicit_suffix():
    """Explicit suffixes are most reliable."""
    tw, certain, mismatch = detect_time_window_precise('weight_7d_median', '', '', '')
    assert tw == TimeWindow.DAYS_7
    assert certain == True
    tw, certain, mismatch = detect_time_window_precise('protein_avg_28d', '', '', '')
    assert tw == TimeWindow.DAYS_28
    assert certain == True
 def test_time_window_latest():
    """Latest/current keywords."""
    tw, certain, mismatch = detect_time_window_precise('weight_aktuell', 'Aktuelles Gewicht', '', '')
    assert tw == TimeWindow.LATEST
    assert certain == True
 def test_time_window_from_contract():
    """Time window from semantic contract."""
    contract = 'Berechnet aus weight_log über 7 Tage'
    tw, certain, mismatch = detect_time_window_precise('weight_avg', '', '', contract)
    assert tw == TimeWindow.DAYS_7
    assert certain == True
 def test_time_window_legacy_mismatch():
    """Detect legacy description mismatch."""
    description = 'Durchschnitt 30 Tage'
    contract = 'Berechnet über 7 Tage'
    tw, certain, mismatch = detect_time_window_precise('weight_avg', description, '', contract)
    assert tw == TimeWindow.DAYS_7  # Implementation wins
    assert mismatch is not None
 def test_time_window_unknown():
    """Returns unknown if cannot determine."""
    tw, certain, mismatch = detect_time_window_precise('some_metric', '', '', '')
    assert tw == TimeWindow.UNKNOWN
    assert certain == False
 # ── Source Provenance Tests ───────────────────────────────────────────────────
 def test_source_skip_safe_wrappers():
    """Safe wrappers are not real sources."""
    func, module, tables, kind = resolve_real_source('_safe_int')
    assert func is None
    assert module is None
    assert kind == "wrapper"
 def test_source_real_data_layer():
    """Real data layer sources."""
    func, module, tables, kind = resolve_real_source('get_latest_weight')
    assert func == 'get_latest_weight_data'
    assert module == 'body_metrics'
    assert 'weight_log' in tables
    assert kind == 'direct'
 def test_source_computed():
    """Computed sources."""
    func, module, tables, kind = resolve_real_source('calculate_bmi')
    assert 'weight_log' in tables
    assert 'profiles' in tables
    assert kind == 'computed'
 def test_source_aggregated():
    """Aggregated sources."""
    func, module, tables, kind = resolve_real_source('get_nutrition_avg')
    assert func == 'get_nutrition_average_data'
    assert module == 'nutrition_metrics'
    assert kind == 'aggregated'
 # ── Quality Filter Policy Tests ───────────────────────────────────────────────
 def test_quality_filter_for_activity():
    """Activity placeholders need quality filter policies."""
    policy = create_activity_quality_policy('activity_summary')
    assert policy is not None
    assert policy.enabled == True
    assert policy.default_filter_level == "quality"
    assert policy.null_quality_handling == "exclude"
    assert policy.includes_poor == False
 def test_quality_filter_not_for_non_activity():
    """Non-activity placeholders don't need quality filters."""
    policy = create_activity_quality_policy('weight_aktuell')
    assert policy is None
    policy = create_activity_quality_policy('protein_avg')
    assert policy is None
 # ── Completeness Score Tests ──────────────────────────────────────────────────
 def test_completeness_score_high():
    """High completeness score."""
    metadata_dict = {
        'category': 'Körper',
        'description': 'Aktuelles Gewicht in kg',
        'semantic_contract': 'Letzter verfügbarer Gewichtseintrag aus weight_log',
        'source': {
            'resolver': 'get_latest_weight',
            'data_layer_module': 'body_metrics',
            'source_tables': ['weight_log']
        },
        'type': 'atomic',
        'time_window': 'latest',
        'output_type': 'number',
        'format_hint': '85.8 kg',
        'quality_filter_policy': None,
        'confidence_logic': {'supported': True}
    }
    score = calculate_completeness_score(metadata_dict)
    assert score >= 80
 def test_completeness_score_low():
    """Low completeness score."""
    metadata_dict = {
        'category': 'Unknown',
        'description': '',
        'semantic_contract': '',
        'source': {'resolver': 'unknown'},
        'type': 'legacy_unknown',
        'time_window': 'unknown',
        'output_type': 'unknown',
        'format_hint': None
    }
    score = calculate_completeness_score(metadata_dict)
    assert score < 50
 # ── Integration Tests ─────────────────────────────────────────────────────────
 def test_no_interpreted_without_provenance():
    """Interpreted type only for proven AI/prompt sources."""
    # This would need to check actual metadata
    # Placeholder for integration test
    pass
 def test_legacy_compatibility_maintained():
    """Legacy export format still works."""
    # This would test that existing consumers still work
    pass
 # ── Run Tests ─────────────────────────────────────────────────────────────────
 if __name__ == "__main__":
    pytest.main([__file__, "-v"])
--- a/docs/PLACEHOLDER_METADATA_VALIDATION.md
+++ b/docs/PLACEHOLDER_METADATA_VALIDATION.md
@ -0,0 +1,540 @@
 # Placeholder Metadata Validation Logic
 **Version:** 2.0.0
 **Generated:** 2026-03-29
 **Status:** Normative
 ---
 ## Purpose
 This document defines the **deterministic derivation logic** for all placeholder metadata fields. It ensures that metadata extraction is **reproducible, testable, and auditable**.
 ---
 ## 1. Type Classification (`PlaceholderType`)
 ### Decision Logic
 ```python
 def determine_type(key, description, output_type, value_display):
    # JSON/Markdown outputs are typically raw_data
    if output_type in [JSON, MARKDOWN]:
        return RAW_DATA
    # Scores and percentages are atomic
    if any(x in key for x in ['score', 'pct', 'adequacy']):
        return ATOMIC
    # Summaries and details are raw_data
    if any(x in key for x in ['summary', 'detail', 'verteilung']):
        return RAW_DATA
    # Goals and focus areas (if derived from prompts)
    if any(x in key for x in ['goal', 'focus', 'top_']):
        # Check if from KI/Prompt stage
        if is_from_prompt_stage(key):
            return INTERPRETED
        else:
            return ATOMIC  # Just database values
    # Correlations are interpreted
    if 'correlation' in key or 'plateau' in key or 'driver' in key:
        return INTERPRETED
    # Default: atomic
    return ATOMIC
 ```
 ### Rules
 1. **ATOMIC**: Single values (numbers, strings, dates) from database or simple computation
 2. **RAW_DATA**: Structured data (JSON, arrays, markdown) representing multiple values
 3. **INTERPRETED**: Values derived from AI/Prompt stages or complex interpretation
 4. **LEGACY_UNKNOWN**: Only for existing unclear placeholders (never for new ones)
 ### Validation
 - `interpreted` requires evidence of prompt/stage origin
 - Calculated scores/aggregations are NOT automatically `interpreted`
 ---
 ## 2. Unit Inference
 ### Decision Logic
 ```python
 def infer_unit(key, description, output_type, type):
    # NO units for:
    if output_type in [JSON, MARKDOWN, ENUM]:
        return None
    if any(x in key for x in ['score', 'correlation', 'adequacy']):
        return None  # Dimensionless
    if any(x in key for x in ['pct', 'ratio', 'balance']):
        return None  # Dimensionless percentage/ratio
    # Weight/mass
    if any(x in key for x in ['weight', 'gewicht', 'fm_', 'lbm_']):
        return 'kg'
    # Circumferences
    if 'umfang' in key or any(x in key for x in ['waist', 'hip', 'chest']):
        return 'cm'
    # Time
    if 'duration' in key or 'dauer' in key or 'debt' in key:
        if 'hours' in description or 'stunden' in description:
            return 'Stunden'
        elif 'minutes' in description:
            return 'Minuten'
        return None  # Unclear
    # Heart rate
    if 'rhr' in key or ('hr' in key and 'hrv' not in key):
        return 'bpm'
    # HRV
    if 'hrv' in key:
        return 'ms'
    # VO2 Max
    if 'vo2' in key:
        return 'ml/kg/min'
    # Calories
    if 'kcal' in key or 'energy' in key:
        return 'kcal'
    # Macros
    if any(x in key for x in ['protein', 'carb', 'fat']) and 'g' in description:
        return 'g'
    # Default: None (conservative)
    return None
 ```
 ### Rules
 1. **NO units** for dimensionless values (scores, correlations, percentages, ratios)
 2. **NO units** for JSON/Markdown/Enum outputs
 3. **NO units** for classifications (e.g., "recomposition_quadrant")
 4. **Conservative**: Only assign unit if certain from key or description
 ### Examples
 ✅ **Correct:**
 - `weight_aktuell` → `kg`
 - `goal_progress_score` → `None` (dimensionless 0-100)
 - `correlation_energy_weight_lag` → `None` (dimensionless)
 - `activity_summary` → `None` (text/JSON)
 ❌ **Incorrect:**
 - `goal_progress_score` → `%` (wrong - it's 0-100 dimensionless)
 - `waist_hip_ratio` → any unit (wrong - dimensionless ratio)
 ---
 ## 3. Time Window Detection
 ### Decision Logic (Priority Order)
 ```python
 def detect_time_window(key, description, semantic_contract, resolver_name):
    # 1. Explicit suffix (highest confidence)
    if '_7d' in key: return DAYS_7, certain=True
    if '_28d' in key: return DAYS_28, certain=True
    if '_30d' in key: return DAYS_30, certain=True
    if '_90d' in key: return DAYS_90, certain=True
    # 2. Latest/current keywords
    if any(x in key for x in ['aktuell', 'latest', 'current']):
        return LATEST, certain=True
    # 3. Semantic contract (high confidence)
    if '7 tag' in semantic_contract or '7d' in semantic_contract:
        # Check for description mismatch
        if '30' in description or '28' in description:
            mark_legacy_mismatch = True
        return DAYS_7, certain=True, mismatch_note
    # 4. Description patterns (medium confidence)
    if 'letzte 7' in description or '7 tag' in description:
        return DAYS_7, certain=False
    # 5. Heuristics (low confidence)
    if 'avg' in key or 'durchschn' in key:
        return DAYS_30, certain=False, "Assumed 30d for average"
    if 'trend' in key:
        return DAYS_28, certain=False, "Assumed 28d for trend"
    # 6. Unknown
    return UNKNOWN, certain=False, "Could not determine"
 ```
 ### Legacy Mismatch Detection
 If description says "7d" but semantic contract (implementation) says "28d":
 - Set `time_window = DAYS_28` (actual implementation)
 - Set `legacy_contract_mismatch = True`
 - Add to `known_issues`: "Description says 7d but implementation is 28d"
 ### Rules
 1. **Actual implementation** takes precedence over legacy description
 2. **Suffix in key** is most reliable indicator
 3. **Semantic contract** (if documented) reflects actual implementation
 4. **Unknown** if cannot be determined with confidence
 ---
 ## 4. Value Raw Extraction
 ### Decision Logic
 ```python
 def extract_value_raw(value_display, output_type, type):
    # No value
    if value_display in ['nicht verfügbar', '', None]:
        return None, success=True
    # JSON output
    if output_type == JSON:
        try:
            return json.loads(value_display), success=True
        except:
            # Try to find JSON in string
            match = re.search(r'(\{.*\}|\[.*\])', value_display, DOTALL)
            if match:
                try:
                    return json.loads(match.group(1)), success=True
                except:
                    pass
            return None, success=False  # Failed
    # Markdown
    if output_type == MARKDOWN:
        return value_display, success=True  # Keep as string
    # Number
    if output_type in [NUMBER, INTEGER]:
        match = re.search(r'([-+]?\d+\.?\d*)', value_display)
        if match:
            val = float(match.group(1))
            return int(val) if output_type == INTEGER else val, success=True
        return None, success=False
    # Date
    if output_type == DATE:
        if re.match(r'\d{4}-\d{2}-\d{2}', value_display):
            return value_display, success=True  # ISO format
        return value_display, success=False  # Unknown format
    # String/Enum
    return value_display, success=True
 ```
 ### Rules
 1. **JSON outputs**: Must be valid JSON objects/arrays, not strings
 2. **Numeric outputs**: Extract number without unit
 3. **Markdown/String**: Keep as-is
 4. **Dates**: Prefer ISO format (YYYY-MM-DD)
 5. **Failure**: Set `value_raw = None` and mark in `unresolved_fields`
 ### Examples
 ✅ **Correct:**
 - `active_goals_json` (JSON) → `{"goals": [...]}` (object)
 - `weight_aktuell` (NUMBER) → `85.8` (number, no unit)
 - `datum_heute` (DATE) → `"2026-03-29"` (ISO string)
 ❌ **Incorrect:**
 - `active_goals_json` (JSON) → `"[Fehler: ...]"` (string, not JSON)
 - `weight_aktuell` (NUMBER) → `"85.8"` (string, not number)
 - `weight_aktuell` (NUMBER) → `85` (extracted from "85.8 kg" incorrectly)
 ---
 ## 5. Source Provenance
 ### Decision Logic
 ```python
 def resolve_source(resolver_name):
    # Skip safe wrappers - not real sources
    if resolver_name in ['_safe_int', '_safe_float', '_safe_json', '_safe_str']:
        return wrapper=True, mark_unresolved
    # Known mappings
    if resolver_name in SOURCE_MAP:
        function, data_layer_module, tables, kind = SOURCE_MAP[resolver_name]
        return function, data_layer_module, tables, kind
    # Goals formatting
    if resolver_name.startswith('_format_goals'):
        return None, None, ['goals'], kind=INTERPRETED
    # Unknown
    return None, None, [], kind=UNKNOWN, mark_unresolved
 ```
 ### Source Kinds
 - **direct**: Direct database read (e.g., `get_latest_weight`)
 - **computed**: Calculated from data (e.g., `calculate_bmi`)
 - **aggregated**: Aggregation over time/records (e.g., `get_nutrition_avg`)
 - **derived**: Derived from other metrics (e.g., `protein_g_per_kg`)
 - **interpreted**: AI/prompt stage output
 - **wrapper**: Safe wrapper (not a real source)
 ### Rules
 1. **Safe wrappers** (`_safe_*`) are NOT valid source functions
 2. Must trace to **real data layer function** or **database table**
 3. Mark as `unresolved` if cannot trace to real source
 ---
 ## 6. Used By Tracking
 ### Decision Logic
 ```python
 def track_usage(placeholder_key, ai_prompts_table):
    used_by = UsedBy(prompts=[], pipelines=[], charts=[])
    for prompt in ai_prompts_table:
        # Check template
        if placeholder_key in prompt.template:
            if prompt.type == 'pipeline':
                used_by.pipelines.append(prompt.name)
            else:
                used_by.prompts.append(prompt.name)
        # Check stages
        for stage in prompt.stages:
            for stage_prompt in stage.prompts:
                if placeholder_key in stage_prompt.template:
                    used_by.pipelines.append(prompt.name)
    # Check charts (future)
    # if placeholder_key in chart_endpoints:
    #     used_by.charts.append(chart_name)
    return used_by
 ```
 ### Orphaned Detection
 If `used_by.prompts` + `used_by.pipelines` + `used_by.charts` are all empty:
 - Set `orphaned_placeholder = True`
 - Consider for deprecation
 ---
 ## 7. Quality Filter Policy (Activity Placeholders)
 ### Decision Logic
 ```python
 def create_quality_policy(key):
    # Activity-related placeholders need quality policies
    if any(x in key for x in ['activity', 'training', 'load', 'volume', 'ability']):
        return QualityFilterPolicy(
            enabled=True,
            default_filter_level="quality",  # quality | acceptable | all
            null_quality_handling="exclude",  # exclude | include_as_uncategorized
            includes_poor=False,
            includes_excluded=False,
            notes="Filters for quality='quality' by default. NULL quality excluded."
        )
    return None
 ```
 ### Rules
 1. **Activity metrics** require quality filter policies
 2. **Default filter**: `quality='quality'` (acceptable and above)
 3. **NULL handling**: Excluded by default
 4. **Poor quality**: Not included unless explicit
 5. **Excluded**: Not included
 ---
 ## 8. Confidence Logic
 ### Decision Logic
 ```python
 def create_confidence_logic(key, data_layer_module):
    # Data layer functions have confidence
    if data_layer_module:
        return ConfidenceLogic(
            supported=True,
            calculation="Based on data availability and thresholds",
            thresholds={"min_data_points": 1},
            notes=f"Determined by {data_layer_module}"
        )
    # Scores
    if 'score' in key:
        return ConfidenceLogic(
            supported=True,
            calculation="Based on data completeness for components",
            notes="Correlates with input data availability"
        )
    # Correlations
    if 'correlation' in key:
        return ConfidenceLogic(
            supported=True,
            calculation="Pearson correlation with significance",
            thresholds={"min_data_points": 7}
        )
    return None
 ```
 ### Rules
 1. **Data layer placeholders**: Have confidence logic
 2. **Scores**: Confidence correlates with data availability
 3. **Correlations**: Require minimum data points
 4. **Simple lookups**: May not need confidence logic
 ---
 ## 9. Metadata Completeness Score
 ### Calculation
 ```python
 def calculate_completeness(metadata):
    score = 0
    # Required fields (30 points)
    if category != 'Unknown': score += 5
    if description and 'No description' not in description: score += 5
    if semantic_contract: score += 10
    if source.resolver != 'unknown': score += 10
    # Type specification (20 points)
    if type != 'legacy_unknown': score += 10
    if time_window != 'unknown': score += 10
    # Output specification (20 points)
    if output_type != 'unknown': score += 10
    if format_hint: score += 10
    # Source provenance (20 points)
    if source.data_layer_module: score += 10
    if source.source_tables: score += 10
    # Quality policies (10 points)
    if quality_filter_policy: score += 5
    if confidence_logic: score += 5
    return min(score, 100)
 ```
 ### Schema Status
 Based on completeness score:
 - **90-100%** + no unresolved → `validated`
 - **50-89%** → `draft`
 - **0-49%** → `incomplete`
 ---
 ## 10. Validation Tests
 ### Required Tests
 ```python
 def test_value_raw_extraction():
    # Test each output_type
    assert extract_value_raw('{"key": "val"}', JSON) == {"key": "val"}
    assert extract_value_raw('85.8 kg', NUMBER) == 85.8
    assert extract_value_raw('2026-03-29', DATE) == '2026-03-29'
 def test_unit_inference():
    # No units for scores
    assert infer_unit('goal_progress_score', ..., NUMBER) == None
    # Correct units for measurements
    assert infer_unit('weight_aktuell', ..., NUMBER) == 'kg'
    # No units for JSON
    assert infer_unit('active_goals_json', ..., JSON) == None
 def test_time_window_detection():
    # Explicit suffix
    assert detect_time_window('weight_7d_median', ...) == DAYS_7
    # Latest
    assert detect_time_window('weight_aktuell', ...) == LATEST
    # Legacy mismatch detection
    tw, mismatch = detect_time_window('weight_trend', desc='7d', contract='28d')
    assert tw == DAYS_28
    assert mismatch == True
 def test_source_provenance():
    # Skip wrappers
    assert resolve_source('_safe_int') == (None, None, [], 'wrapper')
    # Real sources
    func, module, tables, kind = resolve_source('get_latest_weight')
    assert func == 'get_latest_weight_data'
    assert module == 'body_metrics'
    assert 'weight_log' in tables
 def test_quality_filter_for_activity():
    # Activity placeholders need quality filter
    policy = create_quality_policy('activity_summary')
    assert policy is not None
    assert policy.default_filter_level == "quality"
    # Non-activity placeholders don't
    policy = create_quality_policy('weight_aktuell')
    assert policy is None
 ```
 ---
 ## 11. Continuous Validation
 ### Pre-Commit Checks
 ```bash
 # Run validation before commit
 python backend/generate_complete_metadata_v2.py
 # Check for errors
 if QA report shows high failure rate:
    FAIL commit
 ```
 ### CI/CD Integration
 ```yaml
 - name: Validate Placeholder Metadata
  run: |
    python backend/generate_complete_metadata_v2.py
    python backend/tests/test_placeholder_metadata_v2.py
 ```
 ---
 ## Summary
 This validation logic ensures:
 1. **Reproducible**: Same input → same output
 2. **Testable**: All logic has unit tests
 3. **Auditable**: Clear decision paths
 4. **Conservative**: Prefer `unknown` over wrong guesses
 5. **Normative**: Actual implementation > legacy description