""" FILE: tests/test_WP22_integration.py DESCRIPTION: Integrationstest für WP-22 (Graph Intelligence). Prüft: Registry, Lifecycle Scoring, Router-Logik und Regression. Mockt Datenbank und LLM, um Logikfehler isoliert zu finden. """ import unittest import os import shutil import json import yaml from unittest.mock import MagicMock, patch, AsyncMock from datetime import datetime # --- Imports der App-Module --- # Wir gehen davon aus, dass wir im Root-Verzeichnis sind. from app.models.dto import ChatRequest, QueryRequest, QueryHit from app.services.edge_registry import EdgeRegistry from app.core.retriever import _compute_total_score, _get_status_multiplier from app.routers.chat import _classify_intent, get_decision_strategy, chat_endpoint # --- Test Suite --- class TestWP22Integration(unittest.TestCase): def setUp(self): """Bereitet eine isolierte Test-Umgebung vor.""" self.test_dir = "tests/temp_integration" self.os_env_patch = patch.dict(os.environ, { "MINDNET_VAULT_ROOT": self.test_dir, "MINDNET_DECISION_CONFIG": os.path.join(self.test_dir, "config", "decision_engine.yaml"), "MINDNET_TYPES_FILE": os.path.join(self.test_dir, "config", "types.yaml") }) self.os_env_patch.start() # Verzeichnisse erstellen os.makedirs(os.path.join(self.test_dir, "config"), exist_ok=True) os.makedirs(os.path.join(self.test_dir, "01_User_Manual"), exist_ok=True) os.makedirs(os.path.join(self.test_dir, "data", "logs"), exist_ok=True) # 1. Config: decision_engine.yaml (mit Boosts) self.decision_config = { "strategies": { "FACT": { "trigger_keywords": ["was ist"], "edge_boosts": {"part_of": 2.0} }, "CAUSAL": { "trigger_keywords": ["warum", "weshalb"], "edge_boosts": {"caused_by": 3.0, "related_to": 0.5} } } } with open(os.environ["MINDNET_DECISION_CONFIG"], "w") as f: yaml.dump(self.decision_config, f) # 2. Config: Edge Vocabulary (für Registry) with open(os.path.join(self.test_dir, "01_User_Manual", "01_edge_vocabulary.md"), "w") as f: f.write("| **caused_by** | ursache_ist, wegen |\n| **part_of** | teil_von |") # 3. Registry Reset EdgeRegistry._instance = None self.registry = EdgeRegistry(vault_root=self.test_dir) def tearDown(self): self.os_env_patch.stop() if os.path.exists(self.test_dir): shutil.rmtree(self.test_dir) EdgeRegistry._instance = None # ------------------------------------------------------------------------ # TEST 1: Edge Registry & Validation (WP-22 B) # ------------------------------------------------------------------------ def test_edge_registry_aliases(self): print("\n🔵 TEST 1: Edge Registry Resolution") # Test: Alias Auflösung resolved = self.registry.resolve("ursache_ist") self.assertEqual(resolved, "caused_by", "Alias 'ursache_ist' sollte zu 'caused_by' werden.") # Test: Unknown Logging unknown = self.registry.resolve("foobar_link") self.assertEqual(unknown, "foobar_link", "Unbekannte Kanten sollen durchgereicht werden.") log_path = self.registry.unknown_log_path self.assertTrue(os.path.exists(log_path), "Logfile für unbekannte Kanten fehlt.") with open(log_path, "r") as f: self.assertIn("foobar_link", f.read()) print("✅ Registry funktioniert (Alias + Logging).") # ------------------------------------------------------------------------ # TEST 2: Lifecycle Scoring (WP-22 A) # ------------------------------------------------------------------------ def test_lifecycle_scoring_logic(self): print("\n🔵 TEST 2: Lifecycle Scoring (Draft vs. Stable)") # Mock Weights: Sem=1.0, Edge=0.5, Cent=0.0 with patch("app.core.retriever._get_scoring_weights", return_value=(1.0, 0.5, 0.0)): base_sem = 0.9 # Case A: Draft (Malus) payload_draft = {"status": "draft", "retriever_weight": 1.0} mult_draft = _get_status_multiplier(payload_draft) self.assertEqual(mult_draft, 0.8, "Draft sollte 0.8 Multiplier haben.") score_draft, _, _ = _compute_total_score(base_sem, payload_draft) # Case B: Stable (Bonus) payload_stable = {"status": "stable", "retriever_weight": 1.0} mult_stable = _get_status_multiplier(payload_stable) self.assertEqual(mult_stable, 1.2, "Stable sollte 1.2 Multiplier haben.") score_stable, _, _ = _compute_total_score(base_sem, payload_stable) print(f" Draft Score: {score_draft:.2f} | Stable Score: {score_stable:.2f}") self.assertGreater(score_stable, score_draft) print("✅ Lifecycle Scoring korrekt implementiert.") # ------------------------------------------------------------------------ # TEST 3: Semantic Router & Boosting (WP-22 C) # ------------------------------------------------------------------------ async def test_router_integration(self): print("\n🔵 TEST 3: Semantic Router Integration") # Mock LLM Service (für Fallback, wird hier aber durch Keywords umgangen) mock_llm = MagicMock() mock_llm.prompts = {} # --- Szenario A: Kausal-Frage ("Warum...") --- query_causal = "Warum ist das Projekt gescheitert?" # 1. Intent Detection prüfen intent, source = await _classify_intent(query_causal, mock_llm) self.assertEqual(intent, "CAUSAL", "Sollte 'CAUSAL' Intent erkennen via Keywords.") # 2. Strategy Load prüfen strategy = get_decision_strategy(intent) boosts = strategy.get("edge_boosts", {}) self.assertEqual(boosts.get("caused_by"), 3.0, "Sollte 'caused_by' Boost von 3.0 laden.") print(f" Intent: {intent} -> Boosts: {boosts}") print("✅ Router lädt Config korrekt.") # ------------------------------------------------------------------------ # TEST 4: Full Pipeline (Chat -> Retriever) # ------------------------------------------------------------------------ async def test_full_pipeline_flow(self): print("\n🔵 TEST 4: Full Chat Pipeline (Integration)") # Mocks mock_llm = AsyncMock() mock_llm.prompts = {} mock_llm.generate_raw_response.return_value = "Das ist die Antwort." mock_retriever = AsyncMock() # Mock Search Result mock_hit = QueryHit( node_id="123", semantic_score=0.9, edge_bonus=0.5, centrality_bonus=0.0, total_score=1.0, source={"text": "Inhalt"}, payload={"type": "concept"} ) mock_retriever.search.return_value.results = [mock_hit] # Request: "Warum..." req = ChatRequest(message="Warum ist das passiert?", top_k=3) # EXECUTE Endpoint # Wir müssen sicherstellen, dass _load_decision_config unsere Test-Config nutzt. # Da wir os.environ gepatcht haben, sollte das klappen. # Wir müssen die Caches leeren, da Module-Level Variablen sonst alte Werte haben import app.routers.chat app.routers.chat._DECISION_CONFIG_CACHE = None response = await chat_endpoint(req, llm=mock_llm, retriever=mock_retriever) # ASSERTIONS # 1. Wurde der Retriever mit den Boosts aufgerufen? # Wir inspecten das Argument 'boost_edges' im call_args des Retrievers called_query_req = mock_retriever.search.call_args[0][0] self.assertIsNotNone(called_query_req.boost_edges, "Retriever sollte boost_edges erhalten.") self.assertEqual(called_query_req.boost_edges.get("caused_by"), 3.0, "Boost 'caused_by' sollte 3.0 sein.") # 2. Wurde der Intent korrekt durchgereicht? self.assertEqual(response.intent, "CAUSAL") print(f" Retriever called with: {called_query_req.boost_edges}") print("✅ Pipeline reicht Boosts erfolgreich weiter.") # ------------------------------------------------------------------------ # TEST 5: Regression Check (Fallback behavior) # ------------------------------------------------------------------------ async def test_regression_standard_query(self): print("\n🔵 TEST 5: Regression (Standard Query)") # Request ohne Keyword -> Sollte FACT (Default) sein # Oder LLM Fallback (hier gemockt) mock_llm = AsyncMock() mock_llm.prompts = {} # Simuliere LLM sagt nichts spezifisches -> Default FACT mock_llm.generate_raw_response.return_value = "Antwort." mock_retriever = AsyncMock() mock_retriever.search.return_value.results = [] # Cache Reset import app.routers.chat app.routers.chat._DECISION_CONFIG_CACHE = None req = ChatRequest(message="Hallo Welt", top_k=3) response = await chat_endpoint(req, llm=mock_llm, retriever=mock_retriever) # Prüfen ob System nicht crasht und vernünftige Defaults nutzt called_query_req = mock_retriever.search.call_args[0][0] # FACT strategy hat in unserem Test Setup 'part_of': 2.0 # Aber keine 'caused_by' boosts. self.assertEqual(response.intent, "FACT") self.assertNotIn("caused_by", called_query_req.boost_edges or {}) print("✅ Regression Test bestanden (Standard-Flow intakt).") if __name__ == '__main__': # Async Support für Unittest loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) unittest.main()