mindnet/tests/test_WP22_intelligence.py
2025-12-18 12:48:57 +01:00

217 lines
9.2 KiB
Python

"""
FILE: tests/test_WP22_integration.py
DESCRIPTION: Integrationstest für WP-22 (Graph Intelligence).
Prüft: Registry, Lifecycle Scoring, Router-Logik und Regression.
Mockt Datenbank und LLM, um Logikfehler isoliert zu finden.
"""
import unittest
import os
import shutil
import json
import yaml
import asyncio # <--- FIX: Hier war der Fehler
from unittest.mock import MagicMock, patch, AsyncMock
from datetime import datetime
# --- Imports der App-Module ---
from app.models.dto import ChatRequest, QueryRequest, QueryHit
from app.services.edge_registry import EdgeRegistry
from app.core.retriever import _compute_total_score, _get_status_multiplier
from app.routers.chat import _classify_intent, get_decision_strategy, chat_endpoint
# --- Test Suite ---
class TestWP22Integration(unittest.TestCase):
def setUp(self):
"""Bereitet eine isolierte Test-Umgebung vor."""
self.test_dir = "tests/temp_integration"
self.os_env_patch = patch.dict(os.environ, {
"MINDNET_VAULT_ROOT": self.test_dir,
"MINDNET_DECISION_CONFIG": os.path.join(self.test_dir, "config", "decision_engine.yaml"),
"MINDNET_TYPES_FILE": os.path.join(self.test_dir, "config", "types.yaml")
})
self.os_env_patch.start()
# Verzeichnisse erstellen
os.makedirs(os.path.join(self.test_dir, "config"), exist_ok=True)
os.makedirs(os.path.join(self.test_dir, "01_User_Manual"), exist_ok=True)
os.makedirs(os.path.join(self.test_dir, "data", "logs"), exist_ok=True)
# 1. Config: decision_engine.yaml (mit Boosts)
self.decision_config = {
"strategies": {
"FACT": {
"trigger_keywords": ["was ist"],
"edge_boosts": {"part_of": 2.0}
},
"CAUSAL": {
"trigger_keywords": ["warum", "weshalb"],
"edge_boosts": {"caused_by": 3.0, "related_to": 0.5}
}
}
}
with open(os.environ["MINDNET_DECISION_CONFIG"], "w") as f:
yaml.dump(self.decision_config, f)
# 2. Config: Edge Vocabulary (für Registry)
with open(os.path.join(self.test_dir, "01_User_Manual", "01_edge_vocabulary.md"), "w") as f:
f.write("| **caused_by** | ursache_ist, wegen |\n| **part_of** | teil_von |")
# 3. Registry Reset
EdgeRegistry._instance = None
self.registry = EdgeRegistry(vault_root=self.test_dir)
def tearDown(self):
self.os_env_patch.stop()
if os.path.exists(self.test_dir):
shutil.rmtree(self.test_dir)
EdgeRegistry._instance = None
# ------------------------------------------------------------------------
# TEST 1: Edge Registry & Validation (WP-22 B)
# ------------------------------------------------------------------------
def test_edge_registry_aliases(self):
print("\n🔵 TEST 1: Edge Registry Resolution")
# Test: Alias Auflösung
resolved = self.registry.resolve("ursache_ist")
self.assertEqual(resolved, "caused_by", "Alias 'ursache_ist' sollte zu 'caused_by' werden.")
# Test: Unknown Logging
unknown = self.registry.resolve("foobar_link")
self.assertEqual(unknown, "foobar_link", "Unbekannte Kanten sollen durchgereicht werden.")
log_path = self.registry.unknown_log_path
self.assertTrue(os.path.exists(log_path), "Logfile für unbekannte Kanten fehlt.")
with open(log_path, "r") as f:
self.assertIn("foobar_link", f.read())
print("✅ Registry funktioniert (Alias + Logging).")
# ------------------------------------------------------------------------
# TEST 2: Lifecycle Scoring (WP-22 A)
# ------------------------------------------------------------------------
def test_lifecycle_scoring_logic(self):
print("\n🔵 TEST 2: Lifecycle Scoring (Draft vs. Stable)")
# Mock Weights: Sem=1.0, Edge=0.5, Cent=0.0
with patch("app.core.retriever._get_scoring_weights", return_value=(1.0, 0.5, 0.0)):
base_sem = 0.9
# Case A: Draft (Malus)
payload_draft = {"status": "draft", "retriever_weight": 1.0}
mult_draft = _get_status_multiplier(payload_draft)
self.assertEqual(mult_draft, 0.8, "Draft sollte 0.8 Multiplier haben.")
score_draft, _, _ = _compute_total_score(base_sem, payload_draft)
# Case B: Stable (Bonus)
payload_stable = {"status": "stable", "retriever_weight": 1.0}
mult_stable = _get_status_multiplier(payload_stable)
self.assertEqual(mult_stable, 1.2, "Stable sollte 1.2 Multiplier haben.")
score_stable, _, _ = _compute_total_score(base_sem, payload_stable)
print(f" Draft Score: {score_draft:.2f} | Stable Score: {score_stable:.2f}")
self.assertGreater(score_stable, score_draft)
print("✅ Lifecycle Scoring korrekt implementiert.")
# ------------------------------------------------------------------------
# TEST 3: Semantic Router & Boosting (WP-22 C)
# ------------------------------------------------------------------------
async def test_router_integration(self):
print("\n🔵 TEST 3: Semantic Router Integration")
# Mock LLM Service (für Fallback, wird hier aber durch Keywords umgangen)
mock_llm = MagicMock()
mock_llm.prompts = {}
# --- Szenario A: Kausal-Frage ("Warum...") ---
query_causal = "Warum ist das Projekt gescheitert?"
# 1. Intent Detection prüfen
intent, source = await _classify_intent(query_causal, mock_llm)
self.assertEqual(intent, "CAUSAL", "Sollte 'CAUSAL' Intent erkennen via Keywords.")
# 2. Strategy Load prüfen
strategy = get_decision_strategy(intent)
boosts = strategy.get("edge_boosts", {})
self.assertEqual(boosts.get("caused_by"), 3.0, "Sollte 'caused_by' Boost von 3.0 laden.")
print(f" Intent: {intent} -> Boosts: {boosts}")
print("✅ Router lädt Config korrekt.")
# ------------------------------------------------------------------------
# TEST 4: Full Pipeline (Chat -> Retriever)
# ------------------------------------------------------------------------
async def test_full_pipeline_flow(self):
print("\n🔵 TEST 4: Full Chat Pipeline (Integration)")
# Mocks
mock_llm = AsyncMock()
mock_llm.prompts = {}
mock_llm.generate_raw_response.return_value = "Das ist die Antwort."
mock_retriever = AsyncMock()
# Mock Search Result
mock_hit = QueryHit(
node_id="123", semantic_score=0.9, edge_bonus=0.5, centrality_bonus=0.0, total_score=1.0,
source={"text": "Inhalt"}, payload={"type": "concept"}
)
mock_retriever.search.return_value.results = [mock_hit]
# Request: "Warum..."
req = ChatRequest(message="Warum ist das passiert?", top_k=3)
# EXECUTE Endpoint
# Cache Reset
import app.routers.chat
app.routers.chat._DECISION_CONFIG_CACHE = None
response = await chat_endpoint(req, llm=mock_llm, retriever=mock_retriever)
# ASSERTIONS
# 1. Wurde der Retriever mit den Boosts aufgerufen?
called_query_req = mock_retriever.search.call_args[0][0]
self.assertIsNotNone(called_query_req.boost_edges, "Retriever sollte boost_edges erhalten.")
self.assertEqual(called_query_req.boost_edges.get("caused_by"), 3.0, "Boost 'caused_by' sollte 3.0 sein.")
# 2. Wurde der Intent korrekt durchgereicht?
self.assertEqual(response.intent, "CAUSAL")
print(f" Retriever called with: {called_query_req.boost_edges}")
print("✅ Pipeline reicht Boosts erfolgreich weiter.")
# ------------------------------------------------------------------------
# TEST 5: Regression Check (Fallback behavior)
# ------------------------------------------------------------------------
async def test_regression_standard_query(self):
print("\n🔵 TEST 5: Regression (Standard Query)")
mock_llm = AsyncMock()
mock_llm.prompts = {}
mock_llm.generate_raw_response.return_value = "Antwort."
mock_retriever = AsyncMock()
mock_retriever.search.return_value.results = []
# Cache Reset
import app.routers.chat
app.routers.chat._DECISION_CONFIG_CACHE = None
req = ChatRequest(message="Hallo Welt", top_k=3)
response = await chat_endpoint(req, llm=mock_llm, retriever=mock_retriever)
called_query_req = mock_retriever.search.call_args[0][0]
# FACT strategy hat in unserem Test Setup 'part_of': 2.0
# Aber keine 'caused_by' boosts.
self.assertEqual(response.intent, "FACT")
self.assertNotIn("caused_by", called_query_req.boost_edges or {})
print("✅ Regression Test bestanden (Standard-Flow intakt).")
if __name__ == '__main__':
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
unittest.main()