""" Tests for Enhanced Placeholder Metadata System V2 Tests the strict quality controls and enhanced extraction logic. """ import sys from pathlib import Path sys.path.insert(0, str(Path(__file__).parent.parent)) import pytest from placeholder_metadata import ( PlaceholderType, TimeWindow, OutputType ) from placeholder_metadata_enhanced import ( extract_value_raw, infer_unit_strict, detect_time_window_precise, resolve_real_source, create_activity_quality_policy, calculate_completeness_score ) # ── Value Raw Extraction Tests ──────────────────────────────────────────────── def test_value_raw_json(): """JSON outputs must return actual JSON objects.""" # Valid JSON val, success = extract_value_raw('{"goals": [1,2,3]}', OutputType.JSON, PlaceholderType.RAW_DATA) assert success assert isinstance(val, dict) assert val == {"goals": [1,2,3]} # JSON array val, success = extract_value_raw('[1, 2, 3]', OutputType.JSON, PlaceholderType.RAW_DATA) assert success assert isinstance(val, list) # Invalid JSON val, success = extract_value_raw('not json', OutputType.JSON, PlaceholderType.RAW_DATA) assert not success assert val is None def test_value_raw_number(): """Numeric outputs must extract numbers without units.""" # Number with unit val, success = extract_value_raw('85.8 kg', OutputType.NUMBER, PlaceholderType.ATOMIC) assert success assert val == 85.8 # Integer val, success = extract_value_raw('42 Jahre', OutputType.INTEGER, PlaceholderType.ATOMIC) assert success assert val == 42 # Negative number val, success = extract_value_raw('-12.5 kg', OutputType.NUMBER, PlaceholderType.ATOMIC) assert success assert val == -12.5 # No number val, success = extract_value_raw('nicht verfügbar', OutputType.NUMBER, PlaceholderType.ATOMIC) assert not success def test_value_raw_markdown(): """Markdown outputs keep as string.""" val, success = extract_value_raw('# Heading\nText', OutputType.MARKDOWN, PlaceholderType.RAW_DATA) assert success assert val == '# Heading\nText' def test_value_raw_date(): """Date outputs prefer ISO format.""" # ISO format val, success = extract_value_raw('2026-03-29', OutputType.DATE, PlaceholderType.ATOMIC) assert success assert val == '2026-03-29' # Non-ISO (still accepts but marks as uncertain) val, success = extract_value_raw('29.03.2026', OutputType.DATE, PlaceholderType.ATOMIC) assert not success # Unknown format # ── Unit Inference Tests ────────────────────────────────────────────────────── def test_unit_no_units_for_scores(): """Scores are dimensionless (0-100 scale), no units.""" unit = infer_unit_strict('goal_progress_score', 'Progress score', OutputType.INTEGER, PlaceholderType.ATOMIC) assert unit is None unit = infer_unit_strict('protein_adequacy_28d', 'Protein adequacy', OutputType.INTEGER, PlaceholderType.ATOMIC) assert unit is None def test_unit_no_units_for_correlations(): """Correlations are dimensionless.""" unit = infer_unit_strict('correlation_energy_weight', 'Correlation', OutputType.JSON, PlaceholderType.INTERPRETED) assert unit is None def test_unit_no_units_for_ratios(): """Ratios and percentages are dimensionless.""" unit = infer_unit_strict('waist_hip_ratio', 'Waist-hip ratio', OutputType.NUMBER, PlaceholderType.ATOMIC) assert unit is None unit = infer_unit_strict('quality_sessions_pct', 'Quality sessions percentage', OutputType.INTEGER, PlaceholderType.ATOMIC) assert unit is None def test_unit_correct_units_for_measurements(): """Physical measurements have correct units.""" # Weight unit = infer_unit_strict('weight_aktuell', 'Aktuelles Gewicht', OutputType.NUMBER, PlaceholderType.ATOMIC) assert unit == 'kg' # Circumference unit = infer_unit_strict('waist_28d_delta', 'Taillenumfang', OutputType.NUMBER, PlaceholderType.ATOMIC) assert unit == 'cm' # Heart rate unit = infer_unit_strict('vitals_avg_hr', 'Ruhepuls', OutputType.INTEGER, PlaceholderType.ATOMIC) assert unit == 'bpm' # HRV unit = infer_unit_strict('vitals_avg_hrv', 'HRV', OutputType.NUMBER, PlaceholderType.ATOMIC) assert unit == 'ms' def test_unit_no_units_for_json(): """JSON outputs never have units.""" unit = infer_unit_strict('active_goals_json', 'Active goals', OutputType.JSON, PlaceholderType.RAW_DATA) assert unit is None # ── Time Window Detection Tests ─────────────────────────────────────────────── def test_time_window_explicit_suffix(): """Explicit suffixes are most reliable.""" tw, certain, mismatch = detect_time_window_precise('weight_7d_median', '', '', '') assert tw == TimeWindow.DAYS_7 assert certain == True tw, certain, mismatch = detect_time_window_precise('protein_avg_28d', '', '', '') assert tw == TimeWindow.DAYS_28 assert certain == True def test_time_window_latest(): """Latest/current keywords.""" tw, certain, mismatch = detect_time_window_precise('weight_aktuell', 'Aktuelles Gewicht', '', '') assert tw == TimeWindow.LATEST assert certain == True def test_time_window_from_contract(): """Time window from semantic contract.""" contract = 'Berechnet aus weight_log über 7 Tage' tw, certain, mismatch = detect_time_window_precise('weight_avg', '', '', contract) assert tw == TimeWindow.DAYS_7 assert certain == True def test_time_window_legacy_mismatch(): """Detect legacy description mismatch.""" description = 'Durchschnitt 30 Tage' contract = 'Berechnet über 7 Tage' tw, certain, mismatch = detect_time_window_precise('weight_avg', description, '', contract) assert tw == TimeWindow.DAYS_7 # Implementation wins assert mismatch is not None def test_time_window_unknown(): """Returns unknown if cannot determine.""" tw, certain, mismatch = detect_time_window_precise('some_metric', '', '', '') assert tw == TimeWindow.UNKNOWN assert certain == False # ── Source Provenance Tests ─────────────────────────────────────────────────── def test_source_skip_safe_wrappers(): """Safe wrappers are not real sources.""" func, module, tables, kind = resolve_real_source('_safe_int') assert func is None assert module is None assert kind == "wrapper" def test_source_real_data_layer(): """Real data layer sources.""" func, module, tables, kind = resolve_real_source('get_latest_weight') assert func == 'get_latest_weight_data' assert module == 'body_metrics' assert 'weight_log' in tables assert kind == 'direct' def test_source_computed(): """Computed sources.""" func, module, tables, kind = resolve_real_source('calculate_bmi') assert 'weight_log' in tables assert 'profiles' in tables assert kind == 'computed' def test_source_aggregated(): """Aggregated sources.""" func, module, tables, kind = resolve_real_source('get_nutrition_avg') assert func == 'get_nutrition_average_data' assert module == 'nutrition_metrics' assert kind == 'aggregated' # ── Quality Filter Policy Tests ─────────────────────────────────────────────── def test_quality_filter_for_activity(): """Activity placeholders need quality filter policies.""" policy = create_activity_quality_policy('activity_summary') assert policy is not None assert policy.enabled == True assert policy.default_filter_level == "quality" assert policy.null_quality_handling == "exclude" assert policy.includes_poor == False def test_quality_filter_not_for_non_activity(): """Non-activity placeholders don't need quality filters.""" policy = create_activity_quality_policy('weight_aktuell') assert policy is None policy = create_activity_quality_policy('protein_avg') assert policy is None # ── Completeness Score Tests ────────────────────────────────────────────────── def test_completeness_score_high(): """High completeness score.""" metadata_dict = { 'category': 'Körper', 'description': 'Aktuelles Gewicht in kg', 'semantic_contract': 'Letzter verfügbarer Gewichtseintrag aus weight_log', 'source': { 'resolver': 'get_latest_weight', 'data_layer_module': 'body_metrics', 'source_tables': ['weight_log'] }, 'type': 'atomic', 'time_window': 'latest', 'output_type': 'number', 'format_hint': '85.8 kg', 'quality_filter_policy': None, 'confidence_logic': {'supported': True} } score = calculate_completeness_score(metadata_dict) assert score >= 80 def test_completeness_score_low(): """Low completeness score.""" metadata_dict = { 'category': 'Unknown', 'description': '', 'semantic_contract': '', 'source': {'resolver': 'unknown'}, 'type': 'legacy_unknown', 'time_window': 'unknown', 'output_type': 'unknown', 'format_hint': None } score = calculate_completeness_score(metadata_dict) assert score < 50 # ── Integration Tests ───────────────────────────────────────────────────────── def test_no_interpreted_without_provenance(): """Interpreted type only for proven AI/prompt sources.""" # This would need to check actual metadata # Placeholder for integration test pass def test_legacy_compatibility_maintained(): """Legacy export format still works.""" # This would test that existing consumers still work pass # ── Run Tests ───────────────────────────────────────────────────────────────── if __name__ == "__main__": pytest.main([__file__, "-v"])