test_WP22_intelligence aktualisiert roundtrip

This commit is contained in:
Lars 2025-12-18 12:43:19 +01:00
parent 7f7d8c87db
commit 342d3e5103

View File

@ -1,109 +1,227 @@
"""
FILE: tests/test_WP22_integration.py
DESCRIPTION: Integrationstest für WP-22 (Graph Intelligence).
Prüft: Registry, Lifecycle Scoring, Router-Logik und Regression.
Mockt Datenbank und LLM, um Logikfehler isoliert zu finden.
"""
import unittest
import os
import shutil
import json
from unittest.mock import patch
import yaml
from unittest.mock import MagicMock, patch, AsyncMock
from datetime import datetime
# --- FIX: Import der KORREKTEN Funktion und Klassen ---
# --- Imports der App-Module ---
# Wir gehen davon aus, dass wir im Root-Verzeichnis sind.
from app.models.dto import ChatRequest, QueryRequest, QueryHit
from app.services.edge_registry import EdgeRegistry
from app.core.retriever import _compute_total_score, _get_status_multiplier
from app.routers.chat import _classify_intent, get_decision_strategy, chat_endpoint
class TestWP22Intelligence(unittest.TestCase):
# --- Test Suite ---
class TestWP22Integration(unittest.TestCase):
def setUp(self):
# 1. Test-Vault Struktur definieren
self.test_vault_root = os.path.abspath("tests/temp_vault")
self.user_manual_dir = os.path.join(self.test_vault_root, "01_User_Manual")
self.log_dir = os.path.abspath("tests/logs")
"""Bereitet eine isolierte Test-Umgebung vor."""
self.test_dir = "tests/temp_integration"
self.os_env_patch = patch.dict(os.environ, {
"MINDNET_VAULT_ROOT": self.test_dir,
"MINDNET_DECISION_CONFIG": os.path.join(self.test_dir, "config", "decision_engine.yaml"),
"MINDNET_TYPES_FILE": os.path.join(self.test_dir, "config", "types.yaml")
})
self.os_env_patch.start()
# 2. Verzeichnisse erstellen
os.makedirs(self.user_manual_dir, exist_ok=True)
os.makedirs(self.log_dir, exist_ok=True)
# Verzeichnisse erstellen
os.makedirs(os.path.join(self.test_dir, "config"), exist_ok=True)
os.makedirs(os.path.join(self.test_dir, "01_User_Manual"), exist_ok=True)
os.makedirs(os.path.join(self.test_dir, "data", "logs"), exist_ok=True)
# 3. Dummy Vocabulary File am korrekten Ort erstellen
# Der Ort muss exakt '01_User_Manual/01_edge_vocabulary.md' relativ zum vault_root sein
self.vocab_file = os.path.join(self.user_manual_dir, "01_edge_vocabulary.md")
with open(self.vocab_file, "w", encoding="utf-8") as f:
f.write("""
| **canonical** | Aliases |
| :--- | :--- |
| **caused_by** | ursache_ist, wegen |
| **next** | danach, folgt |
""")
# 1. Config: decision_engine.yaml (mit Boosts)
self.decision_config = {
"strategies": {
"FACT": {
"trigger_keywords": ["was ist"],
"edge_boosts": {"part_of": 2.0}
},
"CAUSAL": {
"trigger_keywords": ["warum", "weshalb"],
"edge_boosts": {"caused_by": 3.0, "related_to": 0.5}
}
}
}
with open(os.environ["MINDNET_DECISION_CONFIG"], "w") as f:
yaml.dump(self.decision_config, f)
# 4. Registry Reset & Init mit explizitem Vault Root
# Wir setzen das Singleton zurück, um sicherzustellen, dass es unseren Pfad nutzt
# 2. Config: Edge Vocabulary (für Registry)
with open(os.path.join(self.test_dir, "01_User_Manual", "01_edge_vocabulary.md"), "w") as f:
f.write("| **caused_by** | ursache_ist, wegen |\n| **part_of** | teil_von |")
# 3. Registry Reset
EdgeRegistry._instance = None
# Hier übergeben wir den Test-Vault-Pfad! Das Skript ignoriert jetzt die .env für den Test.
self.registry = EdgeRegistry(vault_root=self.test_vault_root)
self.registry.unknown_log_path = os.path.join(self.log_dir, "unknown_edges.jsonl")
self.registry = EdgeRegistry(vault_root=self.test_dir)
def tearDown(self):
if os.path.exists(self.test_vault_root):
shutil.rmtree(self.test_vault_root)
if os.path.exists("tests/logs"):
shutil.rmtree("tests/logs")
self.os_env_patch.stop()
if os.path.exists(self.test_dir):
shutil.rmtree(self.test_dir)
EdgeRegistry._instance = None
def test_registry_resolution(self):
print("\n--- Test A: Registry & Alias Resolution ---")
# ------------------------------------------------------------------------
# TEST 1: Edge Registry & Validation (WP-22 B)
# ------------------------------------------------------------------------
def test_edge_registry_aliases(self):
print("\n🔵 TEST 1: Edge Registry Resolution")
# Test: Alias Auflösung
resolved = self.registry.resolve("ursache_ist")
self.assertEqual(resolved, "caused_by", "Alias 'ursache_ist' sollte zu 'caused_by' werden.")
# Prüfen ob Pfad korrekt übernommen wurde
expected_path = os.path.join(self.test_vault_root, "01_User_Manual", "01_edge_vocabulary.md")
# Da wir abspath nutzen, vergleichen wir normalized paths
self.assertTrue(os.path.exists(expected_path), "Test fixture file was not created correctly")
# Test: Unknown Logging
unknown = self.registry.resolve("foobar_link")
self.assertEqual(unknown, "foobar_link", "Unbekannte Kanten sollen durchgereicht werden.")
if not self.registry.valid_types:
self.fail(f"Registry empty! Root used: {self.registry.vault_root}")
self.assertEqual(self.registry.resolve("caused_by"), "caused_by")
self.assertEqual(self.registry.resolve("ursache_ist"), "caused_by")
unknown = self.registry.resolve("mystery_link")
self.assertEqual(unknown, "mystery_link")
# Prüfen ob Logging funktioniert
if os.path.exists(self.registry.unknown_log_path):
with open(self.registry.unknown_log_path, "r") as f:
self.assertIn("mystery_link", f.read())
print("✅ Registry loaded from custom vault root & validated.")
else:
self.fail("Logfile was not created.")
def test_lifecycle_scoring(self):
print("\n--- Test B: Lifecycle Scoring Math ---")
base_sem = 0.9
payload_draft = {"status": "draft"}
payload_stable = {"status": "stable"}
log_path = self.registry.unknown_log_path
self.assertTrue(os.path.exists(log_path), "Logfile für unbekannte Kanten fehlt.")
with open(log_path, "r") as f:
self.assertIn("foobar_link", f.read())
print("✅ Registry funktioniert (Alias + Logging).")
# ------------------------------------------------------------------------
# TEST 2: Lifecycle Scoring (WP-22 A)
# ------------------------------------------------------------------------
def test_lifecycle_scoring_logic(self):
print("\n🔵 TEST 2: Lifecycle Scoring (Draft vs. Stable)")
# Mock Weights: Sem=1.0, Edge=0.5, Cent=0.0
with patch("app.core.retriever._get_scoring_weights", return_value=(1.0, 0.5, 0.0)):
base_sem = 0.9
# Case A: Draft (Malus)
payload_draft = {"status": "draft", "retriever_weight": 1.0}
mult_draft = _get_status_multiplier(payload_draft)
self.assertEqual(mult_draft, 0.8, "Draft sollte 0.8 Multiplier haben.")
score_draft, _, _ = _compute_total_score(base_sem, payload_draft)
# Case B: Stable (Bonus)
payload_stable = {"status": "stable", "retriever_weight": 1.0}
mult_stable = _get_status_multiplier(payload_stable)
self.assertEqual(mult_stable, 1.2, "Stable sollte 1.2 Multiplier haben.")
score_draft = base_sem * mult_draft
score_stable = base_sem * mult_stable
score_stable, _, _ = _compute_total_score(base_sem, payload_stable)
self.assertLess(score_draft, base_sem)
self.assertGreater(score_stable, base_sem)
print("✅ Lifecycle scoring math verified.")
print(f" Draft Score: {score_draft:.2f} | Stable Score: {score_stable:.2f}")
self.assertGreater(score_stable, score_draft)
print("✅ Lifecycle Scoring korrekt implementiert.")
def test_dynamic_boosting(self):
print("\n--- Test C: Dynamic Edge Boosting ---")
semantic_score = 0.5
raw_edge_bonus = 1.0
payload = {"status": "active"}
# ------------------------------------------------------------------------
# TEST 3: Semantic Router & Boosting (WP-22 C)
# ------------------------------------------------------------------------
async def test_router_integration(self):
print("\n🔵 TEST 3: Semantic Router Integration")
with patch("app.core.retriever._get_scoring_weights", return_value=(1.0, 1.0, 0.0)):
score_normal, _, _ = _compute_total_score(
semantic_score, payload, edge_bonus=raw_edge_bonus, dynamic_edge_boosts=None
)
boost_map = {"caused_by": 2.0}
score_boosted, _, _ = _compute_total_score(
semantic_score, payload, edge_bonus=raw_edge_bonus, dynamic_edge_boosts=boost_map
# Mock LLM Service (für Fallback, wird hier aber durch Keywords umgangen)
mock_llm = MagicMock()
mock_llm.prompts = {}
# --- Szenario A: Kausal-Frage ("Warum...") ---
query_causal = "Warum ist das Projekt gescheitert?"
# 1. Intent Detection prüfen
intent, source = await _classify_intent(query_causal, mock_llm)
self.assertEqual(intent, "CAUSAL", "Sollte 'CAUSAL' Intent erkennen via Keywords.")
# 2. Strategy Load prüfen
strategy = get_decision_strategy(intent)
boosts = strategy.get("edge_boosts", {})
self.assertEqual(boosts.get("caused_by"), 3.0, "Sollte 'caused_by' Boost von 3.0 laden.")
print(f" Intent: {intent} -> Boosts: {boosts}")
print("✅ Router lädt Config korrekt.")
# ------------------------------------------------------------------------
# TEST 4: Full Pipeline (Chat -> Retriever)
# ------------------------------------------------------------------------
async def test_full_pipeline_flow(self):
print("\n🔵 TEST 4: Full Chat Pipeline (Integration)")
# Mocks
mock_llm = AsyncMock()
mock_llm.prompts = {}
mock_llm.generate_raw_response.return_value = "Das ist die Antwort."
mock_retriever = AsyncMock()
# Mock Search Result
mock_hit = QueryHit(
node_id="123", semantic_score=0.9, edge_bonus=0.5, centrality_bonus=0.0, total_score=1.0,
source={"text": "Inhalt"}, payload={"type": "concept"}
)
mock_retriever.search.return_value.results = [mock_hit]
self.assertGreater(score_boosted, score_normal)
print("✅ Dynamic Boosting logic verified.")
# Request: "Warum..."
req = ChatRequest(message="Warum ist das passiert?", top_k=3)
# EXECUTE Endpoint
# Wir müssen sicherstellen, dass _load_decision_config unsere Test-Config nutzt.
# Da wir os.environ gepatcht haben, sollte das klappen.
# Wir müssen die Caches leeren, da Module-Level Variablen sonst alte Werte haben
import app.routers.chat
app.routers.chat._DECISION_CONFIG_CACHE = None
response = await chat_endpoint(req, llm=mock_llm, retriever=mock_retriever)
# ASSERTIONS
# 1. Wurde der Retriever mit den Boosts aufgerufen?
# Wir inspecten das Argument 'boost_edges' im call_args des Retrievers
called_query_req = mock_retriever.search.call_args[0][0]
self.assertIsNotNone(called_query_req.boost_edges, "Retriever sollte boost_edges erhalten.")
self.assertEqual(called_query_req.boost_edges.get("caused_by"), 3.0, "Boost 'caused_by' sollte 3.0 sein.")
# 2. Wurde der Intent korrekt durchgereicht?
self.assertEqual(response.intent, "CAUSAL")
print(f" Retriever called with: {called_query_req.boost_edges}")
print("✅ Pipeline reicht Boosts erfolgreich weiter.")
# ------------------------------------------------------------------------
# TEST 5: Regression Check (Fallback behavior)
# ------------------------------------------------------------------------
async def test_regression_standard_query(self):
print("\n🔵 TEST 5: Regression (Standard Query)")
# Request ohne Keyword -> Sollte FACT (Default) sein
# Oder LLM Fallback (hier gemockt)
mock_llm = AsyncMock()
mock_llm.prompts = {}
# Simuliere LLM sagt nichts spezifisches -> Default FACT
mock_llm.generate_raw_response.return_value = "Antwort."
mock_retriever = AsyncMock()
mock_retriever.search.return_value.results = []
# Cache Reset
import app.routers.chat
app.routers.chat._DECISION_CONFIG_CACHE = None
req = ChatRequest(message="Hallo Welt", top_k=3)
response = await chat_endpoint(req, llm=mock_llm, retriever=mock_retriever)
# Prüfen ob System nicht crasht und vernünftige Defaults nutzt
called_query_req = mock_retriever.search.call_args[0][0]
# FACT strategy hat in unserem Test Setup 'part_of': 2.0
# Aber keine 'caused_by' boosts.
self.assertEqual(response.intent, "FACT")
self.assertNotIn("caused_by", called_query_req.boost_edges or {})
print("✅ Regression Test bestanden (Standard-Flow intakt).")
if __name__ == '__main__':
# Async Support für Unittest
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
unittest.main()