mindnet/tests/test_WP22_intelligence.py

227 lines
9.8 KiB
Python

"""
FILE: tests/test_WP22_integration.py
DESCRIPTION: Integrationstest für WP-22 (Graph Intelligence).
Prüft: Registry, Lifecycle Scoring, Router-Logik und Regression.
Mockt Datenbank und LLM, um Logikfehler isoliert zu finden.
"""
import unittest
import os
import shutil
import json
import yaml
from unittest.mock import MagicMock, patch, AsyncMock
from datetime import datetime
# --- Imports der App-Module ---
# Wir gehen davon aus, dass wir im Root-Verzeichnis sind.
from app.models.dto import ChatRequest, QueryRequest, QueryHit
from app.services.edge_registry import EdgeRegistry
from app.core.retriever import _compute_total_score, _get_status_multiplier
from app.routers.chat import _classify_intent, get_decision_strategy, chat_endpoint
# --- Test Suite ---
class TestWP22Integration(unittest.TestCase):
def setUp(self):
"""Bereitet eine isolierte Test-Umgebung vor."""
self.test_dir = "tests/temp_integration"
self.os_env_patch = patch.dict(os.environ, {
"MINDNET_VAULT_ROOT": self.test_dir,
"MINDNET_DECISION_CONFIG": os.path.join(self.test_dir, "config", "decision_engine.yaml"),
"MINDNET_TYPES_FILE": os.path.join(self.test_dir, "config", "types.yaml")
})
self.os_env_patch.start()
# Verzeichnisse erstellen
os.makedirs(os.path.join(self.test_dir, "config"), exist_ok=True)
os.makedirs(os.path.join(self.test_dir, "01_User_Manual"), exist_ok=True)
os.makedirs(os.path.join(self.test_dir, "data", "logs"), exist_ok=True)
# 1. Config: decision_engine.yaml (mit Boosts)
self.decision_config = {
"strategies": {
"FACT": {
"trigger_keywords": ["was ist"],
"edge_boosts": {"part_of": 2.0}
},
"CAUSAL": {
"trigger_keywords": ["warum", "weshalb"],
"edge_boosts": {"caused_by": 3.0, "related_to": 0.5}
}
}
}
with open(os.environ["MINDNET_DECISION_CONFIG"], "w") as f:
yaml.dump(self.decision_config, f)
# 2. Config: Edge Vocabulary (für Registry)
with open(os.path.join(self.test_dir, "01_User_Manual", "01_edge_vocabulary.md"), "w") as f:
f.write("| **caused_by** | ursache_ist, wegen |\n| **part_of** | teil_von |")
# 3. Registry Reset
EdgeRegistry._instance = None
self.registry = EdgeRegistry(vault_root=self.test_dir)
def tearDown(self):
self.os_env_patch.stop()
if os.path.exists(self.test_dir):
shutil.rmtree(self.test_dir)
EdgeRegistry._instance = None
# ------------------------------------------------------------------------
# TEST 1: Edge Registry & Validation (WP-22 B)
# ------------------------------------------------------------------------
def test_edge_registry_aliases(self):
print("\n🔵 TEST 1: Edge Registry Resolution")
# Test: Alias Auflösung
resolved = self.registry.resolve("ursache_ist")
self.assertEqual(resolved, "caused_by", "Alias 'ursache_ist' sollte zu 'caused_by' werden.")
# Test: Unknown Logging
unknown = self.registry.resolve("foobar_link")
self.assertEqual(unknown, "foobar_link", "Unbekannte Kanten sollen durchgereicht werden.")
log_path = self.registry.unknown_log_path
self.assertTrue(os.path.exists(log_path), "Logfile für unbekannte Kanten fehlt.")
with open(log_path, "r") as f:
self.assertIn("foobar_link", f.read())
print("✅ Registry funktioniert (Alias + Logging).")
# ------------------------------------------------------------------------
# TEST 2: Lifecycle Scoring (WP-22 A)
# ------------------------------------------------------------------------
def test_lifecycle_scoring_logic(self):
print("\n🔵 TEST 2: Lifecycle Scoring (Draft vs. Stable)")
# Mock Weights: Sem=1.0, Edge=0.5, Cent=0.0
with patch("app.core.retriever._get_scoring_weights", return_value=(1.0, 0.5, 0.0)):
base_sem = 0.9
# Case A: Draft (Malus)
payload_draft = {"status": "draft", "retriever_weight": 1.0}
mult_draft = _get_status_multiplier(payload_draft)
self.assertEqual(mult_draft, 0.8, "Draft sollte 0.8 Multiplier haben.")
score_draft, _, _ = _compute_total_score(base_sem, payload_draft)
# Case B: Stable (Bonus)
payload_stable = {"status": "stable", "retriever_weight": 1.0}
mult_stable = _get_status_multiplier(payload_stable)
self.assertEqual(mult_stable, 1.2, "Stable sollte 1.2 Multiplier haben.")
score_stable, _, _ = _compute_total_score(base_sem, payload_stable)
print(f" Draft Score: {score_draft:.2f} | Stable Score: {score_stable:.2f}")
self.assertGreater(score_stable, score_draft)
print("✅ Lifecycle Scoring korrekt implementiert.")
# ------------------------------------------------------------------------
# TEST 3: Semantic Router & Boosting (WP-22 C)
# ------------------------------------------------------------------------
async def test_router_integration(self):
print("\n🔵 TEST 3: Semantic Router Integration")
# Mock LLM Service (für Fallback, wird hier aber durch Keywords umgangen)
mock_llm = MagicMock()
mock_llm.prompts = {}
# --- Szenario A: Kausal-Frage ("Warum...") ---
query_causal = "Warum ist das Projekt gescheitert?"
# 1. Intent Detection prüfen
intent, source = await _classify_intent(query_causal, mock_llm)
self.assertEqual(intent, "CAUSAL", "Sollte 'CAUSAL' Intent erkennen via Keywords.")
# 2. Strategy Load prüfen
strategy = get_decision_strategy(intent)
boosts = strategy.get("edge_boosts", {})
self.assertEqual(boosts.get("caused_by"), 3.0, "Sollte 'caused_by' Boost von 3.0 laden.")
print(f" Intent: {intent} -> Boosts: {boosts}")
print("✅ Router lädt Config korrekt.")
# ------------------------------------------------------------------------
# TEST 4: Full Pipeline (Chat -> Retriever)
# ------------------------------------------------------------------------
async def test_full_pipeline_flow(self):
print("\n🔵 TEST 4: Full Chat Pipeline (Integration)")
# Mocks
mock_llm = AsyncMock()
mock_llm.prompts = {}
mock_llm.generate_raw_response.return_value = "Das ist die Antwort."
mock_retriever = AsyncMock()
# Mock Search Result
mock_hit = QueryHit(
node_id="123", semantic_score=0.9, edge_bonus=0.5, centrality_bonus=0.0, total_score=1.0,
source={"text": "Inhalt"}, payload={"type": "concept"}
)
mock_retriever.search.return_value.results = [mock_hit]
# Request: "Warum..."
req = ChatRequest(message="Warum ist das passiert?", top_k=3)
# EXECUTE Endpoint
# Wir müssen sicherstellen, dass _load_decision_config unsere Test-Config nutzt.
# Da wir os.environ gepatcht haben, sollte das klappen.
# Wir müssen die Caches leeren, da Module-Level Variablen sonst alte Werte haben
import app.routers.chat
app.routers.chat._DECISION_CONFIG_CACHE = None
response = await chat_endpoint(req, llm=mock_llm, retriever=mock_retriever)
# ASSERTIONS
# 1. Wurde der Retriever mit den Boosts aufgerufen?
# Wir inspecten das Argument 'boost_edges' im call_args des Retrievers
called_query_req = mock_retriever.search.call_args[0][0]
self.assertIsNotNone(called_query_req.boost_edges, "Retriever sollte boost_edges erhalten.")
self.assertEqual(called_query_req.boost_edges.get("caused_by"), 3.0, "Boost 'caused_by' sollte 3.0 sein.")
# 2. Wurde der Intent korrekt durchgereicht?
self.assertEqual(response.intent, "CAUSAL")
print(f" Retriever called with: {called_query_req.boost_edges}")
print("✅ Pipeline reicht Boosts erfolgreich weiter.")
# ------------------------------------------------------------------------
# TEST 5: Regression Check (Fallback behavior)
# ------------------------------------------------------------------------
async def test_regression_standard_query(self):
print("\n🔵 TEST 5: Regression (Standard Query)")
# Request ohne Keyword -> Sollte FACT (Default) sein
# Oder LLM Fallback (hier gemockt)
mock_llm = AsyncMock()
mock_llm.prompts = {}
# Simuliere LLM sagt nichts spezifisches -> Default FACT
mock_llm.generate_raw_response.return_value = "Antwort."
mock_retriever = AsyncMock()
mock_retriever.search.return_value.results = []
# Cache Reset
import app.routers.chat
app.routers.chat._DECISION_CONFIG_CACHE = None
req = ChatRequest(message="Hallo Welt", top_k=3)
response = await chat_endpoint(req, llm=mock_llm, retriever=mock_retriever)
# Prüfen ob System nicht crasht und vernünftige Defaults nutzt
called_query_req = mock_retriever.search.call_args[0][0]
# FACT strategy hat in unserem Test Setup 'part_of': 2.0
# Aber keine 'caused_by' boosts.
self.assertEqual(response.intent, "FACT")
self.assertNotIn("caused_by", called_query_req.boost_edges or {})
print("✅ Regression Test bestanden (Standard-Flow intakt).")
if __name__ == '__main__':
# Async Support für Unittest
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
unittest.main()