MAJOR CHANGES: - Enhanced metadata schema with 7 QA fields - Deterministic derivation logic (no guessing) - Conservative inference (prefer unknown over wrong) - Real source tracking (skip safe wrappers) - Legacy mismatch detection - Activity quality filter policies - Completeness scoring (0-100) - Unresolved fields tracking - Fixed ZIP/JSON export auth (query param support) FILES CHANGED: - backend/placeholder_metadata.py (schema extended) - backend/placeholder_metadata_enhanced.py (NEW, 418 lines) - backend/generate_complete_metadata_v2.py (NEW, 334 lines) - backend/tests/test_placeholder_metadata_v2.py (NEW, 302 lines) - backend/routers/prompts.py (V2 integration + auth fix) - docs/PLACEHOLDER_METADATA_VALIDATION.md (NEW, 541 lines) PROBLEMS FIXED: ✓ value_raw extraction (type-aware, JSON parsing) ✓ Units for dimensionless values (scores, correlations) ✓ Safe wrappers as sources (now skipped) ✓ Time window guessing (confidence flags) ✓ Legacy inconsistencies (marked with flag) ✓ Missing quality filters (activity placeholders) ✓ No completeness metric (0-100 score) ✓ Orphaned placeholders (tracked) ✓ Unresolved fields (explicit list) ✓ ZIP/JSON export auth (query token support for downloads) AUTH FIX: - export-catalog-zip now accepts token via query param (?token=xxx) - export-values-extended now accepts token via query param - Allows browser downloads without custom headers Konzept: docs/PLACEHOLDER_METADATA_REQUIREMENTS_V2_NORMATIVE.md Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
302 lines
10 KiB
Python
302 lines
10 KiB
Python
"""
|
|
Tests for Enhanced Placeholder Metadata System V2
|
|
|
|
Tests the strict quality controls and enhanced extraction logic.
|
|
"""
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
sys.path.insert(0, str(Path(__file__).parent.parent))
|
|
|
|
import pytest
|
|
from placeholder_metadata import (
|
|
PlaceholderType,
|
|
TimeWindow,
|
|
OutputType
|
|
)
|
|
from placeholder_metadata_enhanced import (
|
|
extract_value_raw,
|
|
infer_unit_strict,
|
|
detect_time_window_precise,
|
|
resolve_real_source,
|
|
create_activity_quality_policy,
|
|
calculate_completeness_score
|
|
)
|
|
|
|
|
|
# ── Value Raw Extraction Tests ────────────────────────────────────────────────
|
|
|
|
def test_value_raw_json():
|
|
"""JSON outputs must return actual JSON objects."""
|
|
# Valid JSON
|
|
val, success = extract_value_raw('{"goals": [1,2,3]}', OutputType.JSON, PlaceholderType.RAW_DATA)
|
|
assert success
|
|
assert isinstance(val, dict)
|
|
assert val == {"goals": [1,2,3]}
|
|
|
|
# JSON array
|
|
val, success = extract_value_raw('[1, 2, 3]', OutputType.JSON, PlaceholderType.RAW_DATA)
|
|
assert success
|
|
assert isinstance(val, list)
|
|
|
|
# Invalid JSON
|
|
val, success = extract_value_raw('not json', OutputType.JSON, PlaceholderType.RAW_DATA)
|
|
assert not success
|
|
assert val is None
|
|
|
|
|
|
def test_value_raw_number():
|
|
"""Numeric outputs must extract numbers without units."""
|
|
# Number with unit
|
|
val, success = extract_value_raw('85.8 kg', OutputType.NUMBER, PlaceholderType.ATOMIC)
|
|
assert success
|
|
assert val == 85.8
|
|
|
|
# Integer
|
|
val, success = extract_value_raw('42 Jahre', OutputType.INTEGER, PlaceholderType.ATOMIC)
|
|
assert success
|
|
assert val == 42
|
|
|
|
# Negative number
|
|
val, success = extract_value_raw('-12.5 kg', OutputType.NUMBER, PlaceholderType.ATOMIC)
|
|
assert success
|
|
assert val == -12.5
|
|
|
|
# No number
|
|
val, success = extract_value_raw('nicht verfügbar', OutputType.NUMBER, PlaceholderType.ATOMIC)
|
|
assert not success
|
|
|
|
|
|
def test_value_raw_markdown():
|
|
"""Markdown outputs keep as string."""
|
|
val, success = extract_value_raw('# Heading\nText', OutputType.MARKDOWN, PlaceholderType.RAW_DATA)
|
|
assert success
|
|
assert val == '# Heading\nText'
|
|
|
|
|
|
def test_value_raw_date():
|
|
"""Date outputs prefer ISO format."""
|
|
# ISO format
|
|
val, success = extract_value_raw('2026-03-29', OutputType.DATE, PlaceholderType.ATOMIC)
|
|
assert success
|
|
assert val == '2026-03-29'
|
|
|
|
# Non-ISO (still accepts but marks as uncertain)
|
|
val, success = extract_value_raw('29.03.2026', OutputType.DATE, PlaceholderType.ATOMIC)
|
|
assert not success # Unknown format
|
|
|
|
|
|
# ── Unit Inference Tests ──────────────────────────────────────────────────────
|
|
|
|
def test_unit_no_units_for_scores():
|
|
"""Scores are dimensionless (0-100 scale), no units."""
|
|
unit = infer_unit_strict('goal_progress_score', 'Progress score', OutputType.INTEGER, PlaceholderType.ATOMIC)
|
|
assert unit is None
|
|
|
|
unit = infer_unit_strict('protein_adequacy_28d', 'Protein adequacy', OutputType.INTEGER, PlaceholderType.ATOMIC)
|
|
assert unit is None
|
|
|
|
|
|
def test_unit_no_units_for_correlations():
|
|
"""Correlations are dimensionless."""
|
|
unit = infer_unit_strict('correlation_energy_weight', 'Correlation', OutputType.JSON, PlaceholderType.INTERPRETED)
|
|
assert unit is None
|
|
|
|
|
|
def test_unit_no_units_for_ratios():
|
|
"""Ratios and percentages are dimensionless."""
|
|
unit = infer_unit_strict('waist_hip_ratio', 'Waist-hip ratio', OutputType.NUMBER, PlaceholderType.ATOMIC)
|
|
assert unit is None
|
|
|
|
unit = infer_unit_strict('quality_sessions_pct', 'Quality sessions percentage', OutputType.INTEGER, PlaceholderType.ATOMIC)
|
|
assert unit is None
|
|
|
|
|
|
def test_unit_correct_units_for_measurements():
|
|
"""Physical measurements have correct units."""
|
|
# Weight
|
|
unit = infer_unit_strict('weight_aktuell', 'Aktuelles Gewicht', OutputType.NUMBER, PlaceholderType.ATOMIC)
|
|
assert unit == 'kg'
|
|
|
|
# Circumference
|
|
unit = infer_unit_strict('waist_28d_delta', 'Taillenumfang', OutputType.NUMBER, PlaceholderType.ATOMIC)
|
|
assert unit == 'cm'
|
|
|
|
# Heart rate
|
|
unit = infer_unit_strict('vitals_avg_hr', 'Ruhepuls', OutputType.INTEGER, PlaceholderType.ATOMIC)
|
|
assert unit == 'bpm'
|
|
|
|
# HRV
|
|
unit = infer_unit_strict('vitals_avg_hrv', 'HRV', OutputType.NUMBER, PlaceholderType.ATOMIC)
|
|
assert unit == 'ms'
|
|
|
|
|
|
def test_unit_no_units_for_json():
|
|
"""JSON outputs never have units."""
|
|
unit = infer_unit_strict('active_goals_json', 'Active goals', OutputType.JSON, PlaceholderType.RAW_DATA)
|
|
assert unit is None
|
|
|
|
|
|
# ── Time Window Detection Tests ───────────────────────────────────────────────
|
|
|
|
def test_time_window_explicit_suffix():
|
|
"""Explicit suffixes are most reliable."""
|
|
tw, certain, mismatch = detect_time_window_precise('weight_7d_median', '', '', '')
|
|
assert tw == TimeWindow.DAYS_7
|
|
assert certain == True
|
|
|
|
tw, certain, mismatch = detect_time_window_precise('protein_avg_28d', '', '', '')
|
|
assert tw == TimeWindow.DAYS_28
|
|
assert certain == True
|
|
|
|
|
|
def test_time_window_latest():
|
|
"""Latest/current keywords."""
|
|
tw, certain, mismatch = detect_time_window_precise('weight_aktuell', 'Aktuelles Gewicht', '', '')
|
|
assert tw == TimeWindow.LATEST
|
|
assert certain == True
|
|
|
|
|
|
def test_time_window_from_contract():
|
|
"""Time window from semantic contract."""
|
|
contract = 'Berechnet aus weight_log über 7 Tage'
|
|
tw, certain, mismatch = detect_time_window_precise('weight_avg', '', '', contract)
|
|
assert tw == TimeWindow.DAYS_7
|
|
assert certain == True
|
|
|
|
|
|
def test_time_window_legacy_mismatch():
|
|
"""Detect legacy description mismatch."""
|
|
description = 'Durchschnitt 30 Tage'
|
|
contract = 'Berechnet über 7 Tage'
|
|
|
|
tw, certain, mismatch = detect_time_window_precise('weight_avg', description, '', contract)
|
|
assert tw == TimeWindow.DAYS_7 # Implementation wins
|
|
assert mismatch is not None
|
|
|
|
|
|
def test_time_window_unknown():
|
|
"""Returns unknown if cannot determine."""
|
|
tw, certain, mismatch = detect_time_window_precise('some_metric', '', '', '')
|
|
assert tw == TimeWindow.UNKNOWN
|
|
assert certain == False
|
|
|
|
|
|
# ── Source Provenance Tests ───────────────────────────────────────────────────
|
|
|
|
def test_source_skip_safe_wrappers():
|
|
"""Safe wrappers are not real sources."""
|
|
func, module, tables, kind = resolve_real_source('_safe_int')
|
|
assert func is None
|
|
assert module is None
|
|
assert kind == "wrapper"
|
|
|
|
|
|
def test_source_real_data_layer():
|
|
"""Real data layer sources."""
|
|
func, module, tables, kind = resolve_real_source('get_latest_weight')
|
|
assert func == 'get_latest_weight_data'
|
|
assert module == 'body_metrics'
|
|
assert 'weight_log' in tables
|
|
assert kind == 'direct'
|
|
|
|
|
|
def test_source_computed():
|
|
"""Computed sources."""
|
|
func, module, tables, kind = resolve_real_source('calculate_bmi')
|
|
assert 'weight_log' in tables
|
|
assert 'profiles' in tables
|
|
assert kind == 'computed'
|
|
|
|
|
|
def test_source_aggregated():
|
|
"""Aggregated sources."""
|
|
func, module, tables, kind = resolve_real_source('get_nutrition_avg')
|
|
assert func == 'get_nutrition_average_data'
|
|
assert module == 'nutrition_metrics'
|
|
assert kind == 'aggregated'
|
|
|
|
|
|
# ── Quality Filter Policy Tests ───────────────────────────────────────────────
|
|
|
|
def test_quality_filter_for_activity():
|
|
"""Activity placeholders need quality filter policies."""
|
|
policy = create_activity_quality_policy('activity_summary')
|
|
assert policy is not None
|
|
assert policy.enabled == True
|
|
assert policy.default_filter_level == "quality"
|
|
assert policy.null_quality_handling == "exclude"
|
|
assert policy.includes_poor == False
|
|
|
|
|
|
def test_quality_filter_not_for_non_activity():
|
|
"""Non-activity placeholders don't need quality filters."""
|
|
policy = create_activity_quality_policy('weight_aktuell')
|
|
assert policy is None
|
|
|
|
policy = create_activity_quality_policy('protein_avg')
|
|
assert policy is None
|
|
|
|
|
|
# ── Completeness Score Tests ──────────────────────────────────────────────────
|
|
|
|
def test_completeness_score_high():
|
|
"""High completeness score."""
|
|
metadata_dict = {
|
|
'category': 'Körper',
|
|
'description': 'Aktuelles Gewicht in kg',
|
|
'semantic_contract': 'Letzter verfügbarer Gewichtseintrag aus weight_log',
|
|
'source': {
|
|
'resolver': 'get_latest_weight',
|
|
'data_layer_module': 'body_metrics',
|
|
'source_tables': ['weight_log']
|
|
},
|
|
'type': 'atomic',
|
|
'time_window': 'latest',
|
|
'output_type': 'number',
|
|
'format_hint': '85.8 kg',
|
|
'quality_filter_policy': None,
|
|
'confidence_logic': {'supported': True}
|
|
}
|
|
|
|
score = calculate_completeness_score(metadata_dict)
|
|
assert score >= 80
|
|
|
|
|
|
def test_completeness_score_low():
|
|
"""Low completeness score."""
|
|
metadata_dict = {
|
|
'category': 'Unknown',
|
|
'description': '',
|
|
'semantic_contract': '',
|
|
'source': {'resolver': 'unknown'},
|
|
'type': 'legacy_unknown',
|
|
'time_window': 'unknown',
|
|
'output_type': 'unknown',
|
|
'format_hint': None
|
|
}
|
|
|
|
score = calculate_completeness_score(metadata_dict)
|
|
assert score < 50
|
|
|
|
|
|
# ── Integration Tests ─────────────────────────────────────────────────────────
|
|
|
|
def test_no_interpreted_without_provenance():
|
|
"""Interpreted type only for proven AI/prompt sources."""
|
|
# This would need to check actual metadata
|
|
# Placeholder for integration test
|
|
pass
|
|
|
|
|
|
def test_legacy_compatibility_maintained():
|
|
"""Legacy export format still works."""
|
|
# This would test that existing consumers still work
|
|
pass
|
|
|
|
|
|
# ── Run Tests ─────────────────────────────────────────────────────────────────
|
|
|
|
if __name__ == "__main__":
|
|
pytest.main([__file__, "-v"])
|