""" Unit Tests für normalization_engine.py (Phase 2) Run with: PYTHONPATH=./backend pytest tests/backend/test_phase2_normalization.py -v """ import pytest from workflow_models import SignalStatus from normalization_engine import ( normalize_decision_signal, apply_synonym_mapping, normalize_all_signals ) # ── normalize_decision_signal Tests ──────────────────────────────────────────── def test_exact_match(): """Test: Exakte Übereinstimmung mit Spektrum → valid""" signal = normalize_decision_signal( question_type="relevanz", raw_value="ja", answer_spectrum=["ja", "nein", "unklar"] ) assert signal.status == SignalStatus.VALID assert signal.normalized_value == "ja" assert signal.raw_value == "ja" def test_case_insensitive_uppercase(): """Test: Case-insensitive Matching (Großbuchstaben) → normalized""" signal = normalize_decision_signal( question_type="relevanz", raw_value="JA", answer_spectrum=["ja", "nein", "unklar"] ) assert signal.status == SignalStatus.NORMALIZED assert signal.normalized_value == "ja" assert signal.metadata["method"] == "case_insensitive" def test_case_insensitive_mixed(): """Test: Case-insensitive Matching (Mixed Case) → normalized""" signal = normalize_decision_signal( question_type="prioritaet", raw_value="Hoch", answer_spectrum=["hoch", "mittel", "niedrig"] ) assert signal.status == SignalStatus.NORMALIZED assert signal.normalized_value == "hoch" def test_synonym_mapping_simple(): """Test: Synonym-Mapping → normalized""" rules = {"synonyms": {"ja": ["yes", "Yes", "YES"]}} signal = normalize_decision_signal( question_type="relevanz", raw_value="yes", answer_spectrum=["ja", "nein"], normalization_rules=rules ) assert signal.status == SignalStatus.NORMALIZED assert signal.normalized_value == "ja" assert signal.metadata["method"] == "synonym" def test_synonym_mapping_case_insensitive(): """Test: Synonym-Mapping mit case-insensitive → normalized""" rules = {"synonyms": {"ja": ["yes"]}} signal = normalize_decision_signal( question_type="relevanz", raw_value="YES", answer_spectrum=["ja", "nein"], normalization_rules=rules ) assert signal.status == SignalStatus.NORMALIZED assert signal.normalized_value == "ja" def test_invalid_value(): """Test: Wert außerhalb des Spektrums → invalid""" signal = normalize_decision_signal( question_type="relevanz", raw_value="vielleicht", answer_spectrum=["ja", "nein", "unklar"] ) assert signal.status == SignalStatus.INVALID assert signal.normalized_value is None def test_whitespace_handling(): """Test: Whitespace wird getrimmt → normalized""" signal = normalize_decision_signal( question_type="relevanz", raw_value=" ja ", answer_spectrum=["ja", "nein"] ) assert signal.status == SignalStatus.NORMALIZED # Wegen strip() assert signal.normalized_value == "ja" def test_synonym_no_match(): """Test: Synonym-Rules vorhanden, aber kein Match → invalid""" rules = {"synonyms": {"ja": ["yes"], "nein": ["no"]}} signal = normalize_decision_signal( question_type="relevanz", raw_value="maybe", answer_spectrum=["ja", "nein"], normalization_rules=rules ) assert signal.status == SignalStatus.INVALID # ── apply_synonym_mapping Tests ──────────────────────────────────────────────── def test_apply_synonym_exact(): """Test: Exakte Synonym-Übereinstimmung""" synonyms = {"ja": ["yes", "Yes"], "nein": ["no", "No"]} result = apply_synonym_mapping("yes", synonyms) assert result == "ja" def test_apply_synonym_case_insensitive(): """Test: Case-insensitive Synonym-Matching""" synonyms = {"ja": ["yes"], "nein": ["no"]} result = apply_synonym_mapping("YES", synonyms) assert result == "ja" def test_apply_synonym_no_match(): """Test: Kein Synonym-Match → None""" synonyms = {"ja": ["yes"], "nein": ["no"]} result = apply_synonym_mapping("vielleicht", synonyms) assert result is None def test_apply_synonym_whitespace(): """Test: Synonym mit Whitespace""" synonyms = {"ja": ["yes"]} result = apply_synonym_mapping(" yes ", synonyms) assert result == "ja" # ── normalize_all_signals Tests ──────────────────────────────────────────────── def test_normalize_all_signals_basic(): """Test: Mehrere Signale normalisieren""" signals = { "relevanz": "ja", "prioritaet": "HOCH" } catalog = { "relevanz": {"answer_spectrum": ["ja", "nein"], "normalization_rules": None}, "prioritaet": {"answer_spectrum": ["hoch", "mittel", "niedrig"], "normalization_rules": None} } normalized = normalize_all_signals(signals, catalog) assert len(normalized) == 2 assert normalized[0].question_type == "relevanz" assert normalized[0].status == SignalStatus.VALID assert normalized[1].question_type == "prioritaet" assert normalized[1].status == SignalStatus.NORMALIZED def test_normalize_all_signals_with_synonyms(): """Test: Normalisierung mit Synonymen""" signals = { "relevanz": "yes", "prioritaet": "high" } catalog = { "relevanz": { "answer_spectrum": ["ja", "nein"], "normalization_rules": {"synonyms": {"ja": ["yes"], "nein": ["no"]}} }, "prioritaet": { "answer_spectrum": ["hoch", "mittel", "niedrig"], "normalization_rules": {"synonyms": {"hoch": ["high"], "niedrig": ["low"]}} } } normalized = normalize_all_signals(signals, catalog) assert len(normalized) == 2 assert normalized[0].normalized_value == "ja" assert normalized[1].normalized_value == "hoch" def test_normalize_all_signals_not_in_catalog(): """Test: Question type nicht im Katalog → not_decidable""" signals = {"unknown_type": "value"} catalog = {"relevanz": {"answer_spectrum": ["ja", "nein"], "normalization_rules": None}} normalized = normalize_all_signals(signals, catalog) assert len(normalized) == 1 assert normalized[0].status == SignalStatus.NOT_DECIDABLE assert normalized[0].metadata["error"] == "not_in_catalog" def test_normalize_all_signals_mixed_validity(): """Test: Gemischte Gültigkeit (valid, normalized, invalid)""" signals = { "relevanz": "ja", # valid "prioritaet": "HOCH", # normalized (case) "selektion": "vielleicht" # invalid } catalog = { "relevanz": {"answer_spectrum": ["ja", "nein"], "normalization_rules": None}, "prioritaet": {"answer_spectrum": ["hoch", "mittel", "niedrig"], "normalization_rules": None}, "selektion": {"answer_spectrum": ["ja", "nein"], "normalization_rules": None} } normalized = normalize_all_signals(signals, catalog) assert len(normalized) == 3 assert normalized[0].status == SignalStatus.VALID assert normalized[1].status == SignalStatus.NORMALIZED assert normalized[2].status == SignalStatus.INVALID def test_normalize_all_signals_empty(): """Test: Leere Signal-Liste""" normalized = normalize_all_signals({}, {}) assert len(normalized) == 0 if __name__ == "__main__": pytest.main([__file__, "-v"])