mitai-jinkendo/backend/tests/test_placeholder_metadata_v2.py
Lars 650313347f
All checks were successful
Deploy Development / deploy (push) Successful in 54s
Build Test / lint-backend (push) Successful in 1s
Build Test / build-frontend (push) Successful in 15s
feat: Placeholder Metadata V2 - Normative Implementation + ZIP Export Fix
MAJOR CHANGES:
- Enhanced metadata schema with 7 QA fields
- Deterministic derivation logic (no guessing)
- Conservative inference (prefer unknown over wrong)
- Real source tracking (skip safe wrappers)
- Legacy mismatch detection
- Activity quality filter policies
- Completeness scoring (0-100)
- Unresolved fields tracking
- Fixed ZIP/JSON export auth (query param support)

FILES CHANGED:
- backend/placeholder_metadata.py (schema extended)
- backend/placeholder_metadata_enhanced.py (NEW, 418 lines)
- backend/generate_complete_metadata_v2.py (NEW, 334 lines)
- backend/tests/test_placeholder_metadata_v2.py (NEW, 302 lines)
- backend/routers/prompts.py (V2 integration + auth fix)
- docs/PLACEHOLDER_METADATA_VALIDATION.md (NEW, 541 lines)

PROBLEMS FIXED:
✓ value_raw extraction (type-aware, JSON parsing)
✓ Units for dimensionless values (scores, correlations)
✓ Safe wrappers as sources (now skipped)
✓ Time window guessing (confidence flags)
✓ Legacy inconsistencies (marked with flag)
✓ Missing quality filters (activity placeholders)
✓ No completeness metric (0-100 score)
✓ Orphaned placeholders (tracked)
✓ Unresolved fields (explicit list)
✓ ZIP/JSON export auth (query token support for downloads)

AUTH FIX:
- export-catalog-zip now accepts token via query param (?token=xxx)
- export-values-extended now accepts token via query param
- Allows browser downloads without custom headers

Konzept: docs/PLACEHOLDER_METADATA_REQUIREMENTS_V2_NORMATIVE.md

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-29 21:23:37 +02:00

302 lines
10 KiB
Python

"""
Tests for Enhanced Placeholder Metadata System V2
Tests the strict quality controls and enhanced extraction logic.
"""
import sys
from pathlib import Path
sys.path.insert(0, str(Path(__file__).parent.parent))
import pytest
from placeholder_metadata import (
PlaceholderType,
TimeWindow,
OutputType
)
from placeholder_metadata_enhanced import (
extract_value_raw,
infer_unit_strict,
detect_time_window_precise,
resolve_real_source,
create_activity_quality_policy,
calculate_completeness_score
)
# ── Value Raw Extraction Tests ────────────────────────────────────────────────
def test_value_raw_json():
"""JSON outputs must return actual JSON objects."""
# Valid JSON
val, success = extract_value_raw('{"goals": [1,2,3]}', OutputType.JSON, PlaceholderType.RAW_DATA)
assert success
assert isinstance(val, dict)
assert val == {"goals": [1,2,3]}
# JSON array
val, success = extract_value_raw('[1, 2, 3]', OutputType.JSON, PlaceholderType.RAW_DATA)
assert success
assert isinstance(val, list)
# Invalid JSON
val, success = extract_value_raw('not json', OutputType.JSON, PlaceholderType.RAW_DATA)
assert not success
assert val is None
def test_value_raw_number():
"""Numeric outputs must extract numbers without units."""
# Number with unit
val, success = extract_value_raw('85.8 kg', OutputType.NUMBER, PlaceholderType.ATOMIC)
assert success
assert val == 85.8
# Integer
val, success = extract_value_raw('42 Jahre', OutputType.INTEGER, PlaceholderType.ATOMIC)
assert success
assert val == 42
# Negative number
val, success = extract_value_raw('-12.5 kg', OutputType.NUMBER, PlaceholderType.ATOMIC)
assert success
assert val == -12.5
# No number
val, success = extract_value_raw('nicht verfügbar', OutputType.NUMBER, PlaceholderType.ATOMIC)
assert not success
def test_value_raw_markdown():
"""Markdown outputs keep as string."""
val, success = extract_value_raw('# Heading\nText', OutputType.MARKDOWN, PlaceholderType.RAW_DATA)
assert success
assert val == '# Heading\nText'
def test_value_raw_date():
"""Date outputs prefer ISO format."""
# ISO format
val, success = extract_value_raw('2026-03-29', OutputType.DATE, PlaceholderType.ATOMIC)
assert success
assert val == '2026-03-29'
# Non-ISO (still accepts but marks as uncertain)
val, success = extract_value_raw('29.03.2026', OutputType.DATE, PlaceholderType.ATOMIC)
assert not success # Unknown format
# ── Unit Inference Tests ──────────────────────────────────────────────────────
def test_unit_no_units_for_scores():
"""Scores are dimensionless (0-100 scale), no units."""
unit = infer_unit_strict('goal_progress_score', 'Progress score', OutputType.INTEGER, PlaceholderType.ATOMIC)
assert unit is None
unit = infer_unit_strict('protein_adequacy_28d', 'Protein adequacy', OutputType.INTEGER, PlaceholderType.ATOMIC)
assert unit is None
def test_unit_no_units_for_correlations():
"""Correlations are dimensionless."""
unit = infer_unit_strict('correlation_energy_weight', 'Correlation', OutputType.JSON, PlaceholderType.INTERPRETED)
assert unit is None
def test_unit_no_units_for_ratios():
"""Ratios and percentages are dimensionless."""
unit = infer_unit_strict('waist_hip_ratio', 'Waist-hip ratio', OutputType.NUMBER, PlaceholderType.ATOMIC)
assert unit is None
unit = infer_unit_strict('quality_sessions_pct', 'Quality sessions percentage', OutputType.INTEGER, PlaceholderType.ATOMIC)
assert unit is None
def test_unit_correct_units_for_measurements():
"""Physical measurements have correct units."""
# Weight
unit = infer_unit_strict('weight_aktuell', 'Aktuelles Gewicht', OutputType.NUMBER, PlaceholderType.ATOMIC)
assert unit == 'kg'
# Circumference
unit = infer_unit_strict('waist_28d_delta', 'Taillenumfang', OutputType.NUMBER, PlaceholderType.ATOMIC)
assert unit == 'cm'
# Heart rate
unit = infer_unit_strict('vitals_avg_hr', 'Ruhepuls', OutputType.INTEGER, PlaceholderType.ATOMIC)
assert unit == 'bpm'
# HRV
unit = infer_unit_strict('vitals_avg_hrv', 'HRV', OutputType.NUMBER, PlaceholderType.ATOMIC)
assert unit == 'ms'
def test_unit_no_units_for_json():
"""JSON outputs never have units."""
unit = infer_unit_strict('active_goals_json', 'Active goals', OutputType.JSON, PlaceholderType.RAW_DATA)
assert unit is None
# ── Time Window Detection Tests ───────────────────────────────────────────────
def test_time_window_explicit_suffix():
"""Explicit suffixes are most reliable."""
tw, certain, mismatch = detect_time_window_precise('weight_7d_median', '', '', '')
assert tw == TimeWindow.DAYS_7
assert certain == True
tw, certain, mismatch = detect_time_window_precise('protein_avg_28d', '', '', '')
assert tw == TimeWindow.DAYS_28
assert certain == True
def test_time_window_latest():
"""Latest/current keywords."""
tw, certain, mismatch = detect_time_window_precise('weight_aktuell', 'Aktuelles Gewicht', '', '')
assert tw == TimeWindow.LATEST
assert certain == True
def test_time_window_from_contract():
"""Time window from semantic contract."""
contract = 'Berechnet aus weight_log über 7 Tage'
tw, certain, mismatch = detect_time_window_precise('weight_avg', '', '', contract)
assert tw == TimeWindow.DAYS_7
assert certain == True
def test_time_window_legacy_mismatch():
"""Detect legacy description mismatch."""
description = 'Durchschnitt 30 Tage'
contract = 'Berechnet über 7 Tage'
tw, certain, mismatch = detect_time_window_precise('weight_avg', description, '', contract)
assert tw == TimeWindow.DAYS_7 # Implementation wins
assert mismatch is not None
def test_time_window_unknown():
"""Returns unknown if cannot determine."""
tw, certain, mismatch = detect_time_window_precise('some_metric', '', '', '')
assert tw == TimeWindow.UNKNOWN
assert certain == False
# ── Source Provenance Tests ───────────────────────────────────────────────────
def test_source_skip_safe_wrappers():
"""Safe wrappers are not real sources."""
func, module, tables, kind = resolve_real_source('_safe_int')
assert func is None
assert module is None
assert kind == "wrapper"
def test_source_real_data_layer():
"""Real data layer sources."""
func, module, tables, kind = resolve_real_source('get_latest_weight')
assert func == 'get_latest_weight_data'
assert module == 'body_metrics'
assert 'weight_log' in tables
assert kind == 'direct'
def test_source_computed():
"""Computed sources."""
func, module, tables, kind = resolve_real_source('calculate_bmi')
assert 'weight_log' in tables
assert 'profiles' in tables
assert kind == 'computed'
def test_source_aggregated():
"""Aggregated sources."""
func, module, tables, kind = resolve_real_source('get_nutrition_avg')
assert func == 'get_nutrition_average_data'
assert module == 'nutrition_metrics'
assert kind == 'aggregated'
# ── Quality Filter Policy Tests ───────────────────────────────────────────────
def test_quality_filter_for_activity():
"""Activity placeholders need quality filter policies."""
policy = create_activity_quality_policy('activity_summary')
assert policy is not None
assert policy.enabled == True
assert policy.default_filter_level == "quality"
assert policy.null_quality_handling == "exclude"
assert policy.includes_poor == False
def test_quality_filter_not_for_non_activity():
"""Non-activity placeholders don't need quality filters."""
policy = create_activity_quality_policy('weight_aktuell')
assert policy is None
policy = create_activity_quality_policy('protein_avg')
assert policy is None
# ── Completeness Score Tests ──────────────────────────────────────────────────
def test_completeness_score_high():
"""High completeness score."""
metadata_dict = {
'category': 'Körper',
'description': 'Aktuelles Gewicht in kg',
'semantic_contract': 'Letzter verfügbarer Gewichtseintrag aus weight_log',
'source': {
'resolver': 'get_latest_weight',
'data_layer_module': 'body_metrics',
'source_tables': ['weight_log']
},
'type': 'atomic',
'time_window': 'latest',
'output_type': 'number',
'format_hint': '85.8 kg',
'quality_filter_policy': None,
'confidence_logic': {'supported': True}
}
score = calculate_completeness_score(metadata_dict)
assert score >= 80
def test_completeness_score_low():
"""Low completeness score."""
metadata_dict = {
'category': 'Unknown',
'description': '',
'semantic_contract': '',
'source': {'resolver': 'unknown'},
'type': 'legacy_unknown',
'time_window': 'unknown',
'output_type': 'unknown',
'format_hint': None
}
score = calculate_completeness_score(metadata_dict)
assert score < 50
# ── Integration Tests ─────────────────────────────────────────────────────────
def test_no_interpreted_without_provenance():
"""Interpreted type only for proven AI/prompt sources."""
# This would need to check actual metadata
# Placeholder for integration test
pass
def test_legacy_compatibility_maintained():
"""Legacy export format still works."""
# This would test that existing consumers still work
pass
# ── Run Tests ─────────────────────────────────────────────────────────────────
if __name__ == "__main__":
pytest.main([__file__, "-v"])