mitai-jinkendo/backend/tests/test_placeholder_metadata_v2.py

"""
Tests for Enhanced Placeholder Metadata System V2

Tests the strict quality controls and enhanced extraction logic.
"""
import sys
from pathlib import Path

sys.path.insert(0, str(Path(__file__).parent.parent))

import pytest
from placeholder_metadata import (
    PlaceholderType,
    TimeWindow,
    OutputType
)
from placeholder_metadata_enhanced import (
    extract_value_raw,
    infer_unit_strict,
    detect_time_window_precise,
    resolve_real_source,
    create_activity_quality_policy,
    calculate_completeness_score
)


# ── Value Raw Extraction Tests ────────────────────────────────────────────────

def test_value_raw_json():
    """JSON outputs must return actual JSON objects."""
    # Valid JSON
    val, success = extract_value_raw('{"goals": [1,2,3]}', OutputType.JSON, PlaceholderType.RAW_DATA)
    assert success
    assert isinstance(val, dict)
    assert val == {"goals": [1,2,3]}

    # JSON array
    val, success = extract_value_raw('[1, 2, 3]', OutputType.JSON, PlaceholderType.RAW_DATA)
    assert success
    assert isinstance(val, list)

    # Invalid JSON
    val, success = extract_value_raw('not json', OutputType.JSON, PlaceholderType.RAW_DATA)
    assert not success
    assert val is None


def test_value_raw_number():
    """Numeric outputs must extract numbers without units."""
    # Number with unit
    val, success = extract_value_raw('85.8 kg', OutputType.NUMBER, PlaceholderType.ATOMIC)
    assert success
    assert val == 85.8

    # Integer
    val, success = extract_value_raw('42 Jahre', OutputType.INTEGER, PlaceholderType.ATOMIC)
    assert success
    assert val == 42

    # Negative number
    val, success = extract_value_raw('-12.5 kg', OutputType.NUMBER, PlaceholderType.ATOMIC)
    assert success
    assert val == -12.5

    # No number
    val, success = extract_value_raw('nicht verfügbar', OutputType.NUMBER, PlaceholderType.ATOMIC)
    assert not success


def test_value_raw_markdown():
    """Markdown outputs keep as string."""
    val, success = extract_value_raw('# Heading\nText', OutputType.MARKDOWN, PlaceholderType.RAW_DATA)
    assert success
    assert val == '# Heading\nText'


def test_value_raw_date():
    """Date outputs prefer ISO format."""
    # ISO format
    val, success = extract_value_raw('2026-03-29', OutputType.DATE, PlaceholderType.ATOMIC)
    assert success
    assert val == '2026-03-29'

    # Non-ISO (still accepts but marks as uncertain)
    val, success = extract_value_raw('29.03.2026', OutputType.DATE, PlaceholderType.ATOMIC)
    assert not success  # Unknown format


# ── Unit Inference Tests ──────────────────────────────────────────────────────

def test_unit_no_units_for_scores():
    """Scores are dimensionless (0-100 scale), no units."""
    unit = infer_unit_strict('goal_progress_score', 'Progress score', OutputType.INTEGER, PlaceholderType.ATOMIC)
    assert unit is None

    unit = infer_unit_strict('protein_adequacy_28d', 'Protein adequacy', OutputType.INTEGER, PlaceholderType.ATOMIC)
    assert unit is None


def test_unit_no_units_for_correlations():
    """Correlations are dimensionless."""
    unit = infer_unit_strict('correlation_energy_weight', 'Correlation', OutputType.JSON, PlaceholderType.INTERPRETED)
    assert unit is None


def test_unit_no_units_for_ratios():
    """Ratios and percentages are dimensionless."""
    unit = infer_unit_strict('waist_hip_ratio', 'Waist-hip ratio', OutputType.NUMBER, PlaceholderType.ATOMIC)
    assert unit is None

    unit = infer_unit_strict('quality_sessions_pct', 'Quality sessions percentage', OutputType.INTEGER, PlaceholderType.ATOMIC)
    assert unit is None


def test_unit_correct_units_for_measurements():
    """Physical measurements have correct units."""
    # Weight
    unit = infer_unit_strict('weight_aktuell', 'Aktuelles Gewicht', OutputType.NUMBER, PlaceholderType.ATOMIC)
    assert unit == 'kg'

    # Circumference
    unit = infer_unit_strict('waist_28d_delta', 'Taillenumfang', OutputType.NUMBER, PlaceholderType.ATOMIC)
    assert unit == 'cm'

    # Heart rate
    unit = infer_unit_strict('vitals_avg_hr', 'Ruhepuls', OutputType.INTEGER, PlaceholderType.ATOMIC)
    assert unit == 'bpm'

    # HRV
    unit = infer_unit_strict('vitals_avg_hrv', 'HRV', OutputType.NUMBER, PlaceholderType.ATOMIC)
    assert unit == 'ms'


def test_unit_no_units_for_json():
    """JSON outputs never have units."""
    unit = infer_unit_strict('active_goals_json', 'Active goals', OutputType.JSON, PlaceholderType.RAW_DATA)
    assert unit is None


# ── Time Window Detection Tests ───────────────────────────────────────────────

def test_time_window_explicit_suffix():
    """Explicit suffixes are most reliable."""
    tw, certain, mismatch = detect_time_window_precise('weight_7d_median', '', '', '')
    assert tw == TimeWindow.DAYS_7
    assert certain == True

    tw, certain, mismatch = detect_time_window_precise('protein_avg_28d', '', '', '')
    assert tw == TimeWindow.DAYS_28
    assert certain == True


def test_time_window_latest():
    """Latest/current keywords."""
    tw, certain, mismatch = detect_time_window_precise('weight_aktuell', 'Aktuelles Gewicht', '', '')
    assert tw == TimeWindow.LATEST
    assert certain == True


def test_time_window_from_contract():
    """Time window from semantic contract."""
    contract = 'Berechnet aus weight_log über 7 Tage'
    tw, certain, mismatch = detect_time_window_precise('weight_avg', '', '', contract)
    assert tw == TimeWindow.DAYS_7
    assert certain == True


def test_time_window_legacy_mismatch():
    """Detect legacy description mismatch."""
    description = 'Durchschnitt 30 Tage'
    contract = 'Berechnet über 7 Tage'

    tw, certain, mismatch = detect_time_window_precise('weight_avg', description, '', contract)
    assert tw == TimeWindow.DAYS_7  # Implementation wins
    assert mismatch is not None


def test_time_window_unknown():
    """Returns unknown if cannot determine."""
    tw, certain, mismatch = detect_time_window_precise('some_metric', '', '', '')
    assert tw == TimeWindow.UNKNOWN
    assert certain == False


# ── Source Provenance Tests ───────────────────────────────────────────────────

def test_source_skip_safe_wrappers():
    """Safe wrappers are not real sources."""
    func, module, tables, kind = resolve_real_source('_safe_int')
    assert func is None
    assert module is None
    assert kind == "wrapper"


def test_source_real_data_layer():
    """Real data layer sources."""
    func, module, tables, kind = resolve_real_source('get_latest_weight')
    assert func == 'get_latest_weight_data'
    assert module == 'body_metrics'
    assert 'weight_log' in tables
    assert kind == 'direct'


def test_source_computed():
    """Computed sources."""
    func, module, tables, kind = resolve_real_source('calculate_bmi')
    assert 'weight_log' in tables
    assert 'profiles' in tables
    assert kind == 'computed'


def test_source_aggregated():
    """Aggregated sources."""
    func, module, tables, kind = resolve_real_source('get_nutrition_avg')
    assert func == 'get_nutrition_average_data'
    assert module == 'nutrition_metrics'
    assert kind == 'aggregated'


# ── Quality Filter Policy Tests ───────────────────────────────────────────────

def test_quality_filter_for_activity():
    """Activity placeholders need quality filter policies."""
    policy = create_activity_quality_policy('activity_summary')
    assert policy is not None
    assert policy.enabled == True
    assert policy.default_filter_level == "quality"
    assert policy.null_quality_handling == "exclude"
    assert policy.includes_poor == False


def test_quality_filter_not_for_non_activity():
    """Non-activity placeholders don't need quality filters."""
    policy = create_activity_quality_policy('weight_aktuell')
    assert policy is None

    policy = create_activity_quality_policy('protein_avg')
    assert policy is None


# ── Completeness Score Tests ──────────────────────────────────────────────────

def test_completeness_score_high():
    """High completeness score."""
    metadata_dict = {
        'category': 'Körper',
        'description': 'Aktuelles Gewicht in kg',
        'semantic_contract': 'Letzter verfügbarer Gewichtseintrag aus weight_log',
        'source': {
            'resolver': 'get_latest_weight',
            'data_layer_module': 'body_metrics',
            'source_tables': ['weight_log']
        },
        'type': 'atomic',
        'time_window': 'latest',
        'output_type': 'number',
        'format_hint': '85.8 kg',
        'quality_filter_policy': None,
        'confidence_logic': {'supported': True}
    }

    score = calculate_completeness_score(metadata_dict)
    assert score >= 80


def test_completeness_score_low():
    """Low completeness score."""
    metadata_dict = {
        'category': 'Unknown',
        'description': '',
        'semantic_contract': '',
        'source': {'resolver': 'unknown'},
        'type': 'legacy_unknown',
        'time_window': 'unknown',
        'output_type': 'unknown',
        'format_hint': None
    }

    score = calculate_completeness_score(metadata_dict)
    assert score < 50


# ── Integration Tests ─────────────────────────────────────────────────────────

def test_no_interpreted_without_provenance():
    """Interpreted type only for proven AI/prompt sources."""
    # This would need to check actual metadata
    # Placeholder for integration test
    pass


def test_legacy_compatibility_maintained():
    """Legacy export format still works."""
    # This would test that existing consumers still work
    pass


# ── Run Tests ─────────────────────────────────────────────────────────────────

if __name__ == "__main__":
    pytest.main([__file__, "-v"])