From 342d3e510315aed5f01ec537c019f3f3320d29c1 Mon Sep 17 00:00:00 2001 From: Lars Date: Thu, 18 Dec 2025 12:43:19 +0100 Subject: [PATCH] test_WP22_intelligence aktualisiert roundtrip --- tests/test_WP22_intelligence.py | 292 ++++++++++++++++++++++---------- 1 file changed, 205 insertions(+), 87 deletions(-) diff --git a/tests/test_WP22_intelligence.py b/tests/test_WP22_intelligence.py index 8df6907..52a29dd 100644 --- a/tests/test_WP22_intelligence.py +++ b/tests/test_WP22_intelligence.py @@ -1,109 +1,227 @@ +""" +FILE: tests/test_WP22_integration.py +DESCRIPTION: Integrationstest für WP-22 (Graph Intelligence). + Prüft: Registry, Lifecycle Scoring, Router-Logik und Regression. + Mockt Datenbank und LLM, um Logikfehler isoliert zu finden. +""" import unittest import os import shutil import json -from unittest.mock import patch +import yaml +from unittest.mock import MagicMock, patch, AsyncMock +from datetime import datetime -# --- FIX: Import der KORREKTEN Funktion und Klassen --- +# --- Imports der App-Module --- +# Wir gehen davon aus, dass wir im Root-Verzeichnis sind. +from app.models.dto import ChatRequest, QueryRequest, QueryHit from app.services.edge_registry import EdgeRegistry from app.core.retriever import _compute_total_score, _get_status_multiplier +from app.routers.chat import _classify_intent, get_decision_strategy, chat_endpoint -class TestWP22Intelligence(unittest.TestCase): +# --- Test Suite --- +class TestWP22Integration(unittest.TestCase): def setUp(self): - # 1. Test-Vault Struktur definieren - self.test_vault_root = os.path.abspath("tests/temp_vault") - self.user_manual_dir = os.path.join(self.test_vault_root, "01_User_Manual") - self.log_dir = os.path.abspath("tests/logs") - - # 2. Verzeichnisse erstellen - os.makedirs(self.user_manual_dir, exist_ok=True) - os.makedirs(self.log_dir, exist_ok=True) - - # 3. Dummy Vocabulary File am korrekten Ort erstellen - # Der Ort muss exakt '01_User_Manual/01_edge_vocabulary.md' relativ zum vault_root sein - self.vocab_file = os.path.join(self.user_manual_dir, "01_edge_vocabulary.md") - with open(self.vocab_file, "w", encoding="utf-8") as f: - f.write(""" -| **canonical** | Aliases | -| :--- | :--- | -| **caused_by** | ursache_ist, wegen | -| **next** | danach, folgt | - """) - - # 4. Registry Reset & Init mit explizitem Vault Root - # Wir setzen das Singleton zurück, um sicherzustellen, dass es unseren Pfad nutzt - EdgeRegistry._instance = None - # Hier übergeben wir den Test-Vault-Pfad! Das Skript ignoriert jetzt die .env für den Test. - self.registry = EdgeRegistry(vault_root=self.test_vault_root) - self.registry.unknown_log_path = os.path.join(self.log_dir, "unknown_edges.jsonl") + """Bereitet eine isolierte Test-Umgebung vor.""" + self.test_dir = "tests/temp_integration" + self.os_env_patch = patch.dict(os.environ, { + "MINDNET_VAULT_ROOT": self.test_dir, + "MINDNET_DECISION_CONFIG": os.path.join(self.test_dir, "config", "decision_engine.yaml"), + "MINDNET_TYPES_FILE": os.path.join(self.test_dir, "config", "types.yaml") + }) + self.os_env_patch.start() + + # Verzeichnisse erstellen + os.makedirs(os.path.join(self.test_dir, "config"), exist_ok=True) + os.makedirs(os.path.join(self.test_dir, "01_User_Manual"), exist_ok=True) + os.makedirs(os.path.join(self.test_dir, "data", "logs"), exist_ok=True) + + # 1. Config: decision_engine.yaml (mit Boosts) + self.decision_config = { + "strategies": { + "FACT": { + "trigger_keywords": ["was ist"], + "edge_boosts": {"part_of": 2.0} + }, + "CAUSAL": { + "trigger_keywords": ["warum", "weshalb"], + "edge_boosts": {"caused_by": 3.0, "related_to": 0.5} + } + } + } + with open(os.environ["MINDNET_DECISION_CONFIG"], "w") as f: + yaml.dump(self.decision_config, f) + + # 2. Config: Edge Vocabulary (für Registry) + with open(os.path.join(self.test_dir, "01_User_Manual", "01_edge_vocabulary.md"), "w") as f: + f.write("| **caused_by** | ursache_ist, wegen |\n| **part_of** | teil_von |") + + # 3. Registry Reset + EdgeRegistry._instance = None + self.registry = EdgeRegistry(vault_root=self.test_dir) def tearDown(self): - if os.path.exists(self.test_vault_root): - shutil.rmtree(self.test_vault_root) - if os.path.exists("tests/logs"): - shutil.rmtree("tests/logs") + self.os_env_patch.stop() + if os.path.exists(self.test_dir): + shutil.rmtree(self.test_dir) EdgeRegistry._instance = None - def test_registry_resolution(self): - print("\n--- Test A: Registry & Alias Resolution ---") + # ------------------------------------------------------------------------ + # TEST 1: Edge Registry & Validation (WP-22 B) + # ------------------------------------------------------------------------ + def test_edge_registry_aliases(self): + print("\n🔵 TEST 1: Edge Registry Resolution") + # Test: Alias Auflösung + resolved = self.registry.resolve("ursache_ist") + self.assertEqual(resolved, "caused_by", "Alias 'ursache_ist' sollte zu 'caused_by' werden.") - # Prüfen ob Pfad korrekt übernommen wurde - expected_path = os.path.join(self.test_vault_root, "01_User_Manual", "01_edge_vocabulary.md") - # Da wir abspath nutzen, vergleichen wir normalized paths - self.assertTrue(os.path.exists(expected_path), "Test fixture file was not created correctly") + # Test: Unknown Logging + unknown = self.registry.resolve("foobar_link") + self.assertEqual(unknown, "foobar_link", "Unbekannte Kanten sollen durchgereicht werden.") - if not self.registry.valid_types: - self.fail(f"Registry empty! Root used: {self.registry.vault_root}") + log_path = self.registry.unknown_log_path + self.assertTrue(os.path.exists(log_path), "Logfile für unbekannte Kanten fehlt.") + with open(log_path, "r") as f: + self.assertIn("foobar_link", f.read()) + print("✅ Registry funktioniert (Alias + Logging).") - self.assertEqual(self.registry.resolve("caused_by"), "caused_by") - self.assertEqual(self.registry.resolve("ursache_ist"), "caused_by") - - unknown = self.registry.resolve("mystery_link") - self.assertEqual(unknown, "mystery_link") - - # Prüfen ob Logging funktioniert - if os.path.exists(self.registry.unknown_log_path): - with open(self.registry.unknown_log_path, "r") as f: - self.assertIn("mystery_link", f.read()) - print("✅ Registry loaded from custom vault root & validated.") - else: - self.fail("Logfile was not created.") - - def test_lifecycle_scoring(self): - print("\n--- Test B: Lifecycle Scoring Math ---") - base_sem = 0.9 - payload_draft = {"status": "draft"} - payload_stable = {"status": "stable"} - + # ------------------------------------------------------------------------ + # TEST 2: Lifecycle Scoring (WP-22 A) + # ------------------------------------------------------------------------ + def test_lifecycle_scoring_logic(self): + print("\n🔵 TEST 2: Lifecycle Scoring (Draft vs. Stable)") + # Mock Weights: Sem=1.0, Edge=0.5, Cent=0.0 with patch("app.core.retriever._get_scoring_weights", return_value=(1.0, 0.5, 0.0)): - mult_draft = _get_status_multiplier(payload_draft) - mult_stable = _get_status_multiplier(payload_stable) - - score_draft = base_sem * mult_draft - score_stable = base_sem * mult_stable - - self.assertLess(score_draft, base_sem) - self.assertGreater(score_stable, base_sem) - print("✅ Lifecycle scoring math verified.") - - def test_dynamic_boosting(self): - print("\n--- Test C: Dynamic Edge Boosting ---") - semantic_score = 0.5 - raw_edge_bonus = 1.0 - payload = {"status": "active"} - - with patch("app.core.retriever._get_scoring_weights", return_value=(1.0, 1.0, 0.0)): - score_normal, _, _ = _compute_total_score( - semantic_score, payload, edge_bonus=raw_edge_bonus, dynamic_edge_boosts=None - ) - boost_map = {"caused_by": 2.0} - score_boosted, _, _ = _compute_total_score( - semantic_score, payload, edge_bonus=raw_edge_bonus, dynamic_edge_boosts=boost_map - ) + base_sem = 0.9 - self.assertGreater(score_boosted, score_normal) - print("✅ Dynamic Boosting logic verified.") + # Case A: Draft (Malus) + payload_draft = {"status": "draft", "retriever_weight": 1.0} + mult_draft = _get_status_multiplier(payload_draft) + self.assertEqual(mult_draft, 0.8, "Draft sollte 0.8 Multiplier haben.") + + score_draft, _, _ = _compute_total_score(base_sem, payload_draft) + + # Case B: Stable (Bonus) + payload_stable = {"status": "stable", "retriever_weight": 1.0} + mult_stable = _get_status_multiplier(payload_stable) + self.assertEqual(mult_stable, 1.2, "Stable sollte 1.2 Multiplier haben.") + + score_stable, _, _ = _compute_total_score(base_sem, payload_stable) + + print(f" Draft Score: {score_draft:.2f} | Stable Score: {score_stable:.2f}") + self.assertGreater(score_stable, score_draft) + print("✅ Lifecycle Scoring korrekt implementiert.") + + # ------------------------------------------------------------------------ + # TEST 3: Semantic Router & Boosting (WP-22 C) + # ------------------------------------------------------------------------ + async def test_router_integration(self): + print("\n🔵 TEST 3: Semantic Router Integration") + + # Mock LLM Service (für Fallback, wird hier aber durch Keywords umgangen) + mock_llm = MagicMock() + mock_llm.prompts = {} + + # --- Szenario A: Kausal-Frage ("Warum...") --- + query_causal = "Warum ist das Projekt gescheitert?" + + # 1. Intent Detection prüfen + intent, source = await _classify_intent(query_causal, mock_llm) + self.assertEqual(intent, "CAUSAL", "Sollte 'CAUSAL' Intent erkennen via Keywords.") + + # 2. Strategy Load prüfen + strategy = get_decision_strategy(intent) + boosts = strategy.get("edge_boosts", {}) + self.assertEqual(boosts.get("caused_by"), 3.0, "Sollte 'caused_by' Boost von 3.0 laden.") + + print(f" Intent: {intent} -> Boosts: {boosts}") + print("✅ Router lädt Config korrekt.") + + # ------------------------------------------------------------------------ + # TEST 4: Full Pipeline (Chat -> Retriever) + # ------------------------------------------------------------------------ + async def test_full_pipeline_flow(self): + print("\n🔵 TEST 4: Full Chat Pipeline (Integration)") + + # Mocks + mock_llm = AsyncMock() + mock_llm.prompts = {} + mock_llm.generate_raw_response.return_value = "Das ist die Antwort." + + mock_retriever = AsyncMock() + # Mock Search Result + mock_hit = QueryHit( + node_id="123", semantic_score=0.9, edge_bonus=0.5, centrality_bonus=0.0, total_score=1.0, + source={"text": "Inhalt"}, payload={"type": "concept"} + ) + mock_retriever.search.return_value.results = [mock_hit] + + # Request: "Warum..." + req = ChatRequest(message="Warum ist das passiert?", top_k=3) + + # EXECUTE Endpoint + # Wir müssen sicherstellen, dass _load_decision_config unsere Test-Config nutzt. + # Da wir os.environ gepatcht haben, sollte das klappen. + + # Wir müssen die Caches leeren, da Module-Level Variablen sonst alte Werte haben + import app.routers.chat + app.routers.chat._DECISION_CONFIG_CACHE = None + + response = await chat_endpoint(req, llm=mock_llm, retriever=mock_retriever) + + # ASSERTIONS + + # 1. Wurde der Retriever mit den Boosts aufgerufen? + # Wir inspecten das Argument 'boost_edges' im call_args des Retrievers + called_query_req = mock_retriever.search.call_args[0][0] + + self.assertIsNotNone(called_query_req.boost_edges, "Retriever sollte boost_edges erhalten.") + self.assertEqual(called_query_req.boost_edges.get("caused_by"), 3.0, "Boost 'caused_by' sollte 3.0 sein.") + + # 2. Wurde der Intent korrekt durchgereicht? + self.assertEqual(response.intent, "CAUSAL") + + print(f" Retriever called with: {called_query_req.boost_edges}") + print("✅ Pipeline reicht Boosts erfolgreich weiter.") + + # ------------------------------------------------------------------------ + # TEST 5: Regression Check (Fallback behavior) + # ------------------------------------------------------------------------ + async def test_regression_standard_query(self): + print("\n🔵 TEST 5: Regression (Standard Query)") + + # Request ohne Keyword -> Sollte FACT (Default) sein + # Oder LLM Fallback (hier gemockt) + + mock_llm = AsyncMock() + mock_llm.prompts = {} + # Simuliere LLM sagt nichts spezifisches -> Default FACT + mock_llm.generate_raw_response.return_value = "Antwort." + + mock_retriever = AsyncMock() + mock_retriever.search.return_value.results = [] + + # Cache Reset + import app.routers.chat + app.routers.chat._DECISION_CONFIG_CACHE = None + + req = ChatRequest(message="Hallo Welt", top_k=3) + + response = await chat_endpoint(req, llm=mock_llm, retriever=mock_retriever) + + # Prüfen ob System nicht crasht und vernünftige Defaults nutzt + called_query_req = mock_retriever.search.call_args[0][0] + + # FACT strategy hat in unserem Test Setup 'part_of': 2.0 + # Aber keine 'caused_by' boosts. + self.assertEqual(response.intent, "FACT") + self.assertNotIn("caused_by", called_query_req.boost_edges or {}) + + print("✅ Regression Test bestanden (Standard-Flow intakt).") if __name__ == '__main__': + # Async Support für Unittest + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) unittest.main() \ No newline at end of file