diff --git a/app/core/ingestion.py b/app/core/ingestion.py index 5834f3d..b1a43cc 100644 --- a/app/core/ingestion.py +++ b/app/core/ingestion.py @@ -4,7 +4,7 @@ DESCRIPTION: Haupt-Ingestion-Logik. FIX: Korrekte Priorisierung von Frontmatter für chunk_profile und retriever_weight. Lade Chunk-Config basierend auf dem effektiven Profil, nicht nur dem Notiz-Typ. WP-22: Integration von Content Lifecycle (Status) und Edge Registry. -VERSION: 2.8.1 (WP-22 Lifecycle & Registry) +VERSION: 2.8.5 (WP-22 Lifecycle & Registry) STATUS: Active DEPENDENCIES: app.core.parser, app.core.note_payload, app.core.chunker, app.core.derive_edges, app.core.qdrant*, app.services.embeddings_client, app.services.edge_registry EXTERNAL_CONFIG: config/types.yaml diff --git a/app/core/retriever.py b/app/core/retriever.py index 45f28a7..a537a0c 100644 --- a/app/core/retriever.py +++ b/app/core/retriever.py @@ -2,7 +2,7 @@ FILE: app/core/retriever.py DESCRIPTION: Implementiert die Hybrid-Suche (Vektor + Graph-Expansion) und das Scoring-Modell (Explainability). WP-22 Update: Dynamic Edge Boosting & Lifecycle Scoring. -VERSION: 0.6.1 (WP-22 Dynamic Scoring) +VERSION: 0.6.5 (WP-22 Scoring Formula) STATUS: Active DEPENDENCIES: app.config, app.models.dto, app.core.qdrant*, app.services.embeddings_client, app.core.graph_adapter LAST_ANALYSIS: 2025-12-18 @@ -101,45 +101,48 @@ def _semantic_hits( # --- WP-22 Helper: Lifecycle Multipliers (Teil A) --- def _get_status_multiplier(payload: Dict[str, Any]) -> float: """ - WP-22: stable (1.2), active (1.0), draft (0.5). + WP-22: stable (1.2), active/default (1.0), draft (0.5). """ status = str(payload.get("status", "active")).lower() if status == "stable": return 1.2 - if status == "active": return 1.0 if status == "draft": return 0.5 return 1.0 +# --- WP-22: Dynamic Scoring Formula (Teil C) --- def _compute_total_score( semantic_score: float, payload: Dict[str, Any], - edge_bonus: float = 0.0, - cent_bonus: float = 0.0, + edge_bonus_raw: float = 0.0, + cent_bonus_raw: float = 0.0, dynamic_edge_boosts: Dict[str, float] = None ) -> Tuple[float, float, float]: """ - Berechnet total_score nach WP-22 Scoring Formel. + WP-22 Mathematische Logik: + Score = BaseScore * (1 + ConfigWeight + DynamicBoost) + + Hierbei gilt: + - BaseScore: semantic_similarity * status_multiplier + - ConfigWeight: retriever_weight (Type Boost) + - DynamicBoost: (edge_weight * edge_bonus) + (centrality_weight * centrality_bonus) """ - raw_weight = payload.get("retriever_weight", 1.0) - try: - weight = float(raw_weight) - except (TypeError, ValueError): - weight = 1.0 - if weight < 0.0: - weight = 0.0 - - sem_w, edge_w, cent_w = _get_scoring_weights() + + # 1. Base Score (Semantik * Lifecycle) status_mult = _get_status_multiplier(payload) - - # Dynamic Edge Boosting (Teil C) - # Globaler Bonus falls Kanten-spezifische Boosts aktiv sind (z.B. WHY Frage) - # Die kanten-spezifische Gewichtung passiert bereits im Subgraph in hybrid_retrieve. - final_edge_score = edge_w * edge_bonus - if dynamic_edge_boosts and edge_bonus > 0: - # Globaler Verstärker für Graph-Signale bei spezifischen Intents - final_edge_score *= 1.5 - - total = (sem_w * float(semantic_score) * weight * status_mult) + final_edge_score + (cent_w * cent_bonus) - return float(total), float(edge_bonus), float(cent_bonus) + base_score = float(semantic_score) * status_mult + + # 2. Config Weight (Static Type Boost) + config_weight = float(payload.get("retriever_weight", 1.0)) - 1.0 # 1.0 ist neutral + + # 3. Dynamic Boost (Graph-Signale) + _sem_w, edge_w_cfg, cent_w_cfg = _get_scoring_weights() + dynamic_boost = (edge_w_cfg * edge_bonus_raw) + (cent_w_cfg * cent_bonus_raw) + + # Falls Intent-Boosts vorliegen, verstärken wir den Dynamic Boost + if dynamic_edge_boosts and (edge_bonus_raw > 0 or cent_bonus_raw > 0): + dynamic_boost *= 1.5 + + total = base_score * (1.0 + config_weight + dynamic_boost) + return float(total), float(edge_bonus_raw), float(cent_bonus_raw) # --- WP-04b Explanation Logic --- @@ -153,22 +156,21 @@ def _build_explanation( node_key: Optional[str] ) -> Explanation: """Erstellt ein Explanation-Objekt (WP-04b).""" - sem_w, _edge_w, _cent_w = _get_scoring_weights() - # Scoring weights erneut laden für Reason-Details _, edge_w_cfg, cent_w_cfg = _get_scoring_weights() - try: - type_weight = float(payload.get("retriever_weight", 1.0)) - except (TypeError, ValueError): - type_weight = 1.0 - + type_weight = float(payload.get("retriever_weight", 1.0)) status_mult = _get_status_multiplier(payload) note_type = payload.get("type", "unknown") + # Breakdown für Explanation (Muss die Scoring Formel spiegeln) + config_w_impact = type_weight - 1.0 + dynamic_b_impact = (edge_w_cfg * edge_bonus) + (cent_w_cfg * cent_bonus) + base_val = semantic_score * status_mult + breakdown = ScoreBreakdown( - semantic_contribution=(sem_w * semantic_score * type_weight * status_mult), - edge_contribution=(edge_w_cfg * edge_bonus), - centrality_contribution=(cent_w_cfg * cent_bonus), + semantic_contribution=base_val, + edge_contribution=base_val * dynamic_b_impact, + centrality_contribution=0.0, # In dynamic_b_impact enthalten raw_semantic=semantic_score, raw_edge_bonus=edge_bonus, raw_centrality=cent_bonus, @@ -185,9 +187,8 @@ def _build_explanation( if type_weight != 1.0: msg = "Bevorzugt" if type_weight > 1.0 else "Leicht abgewertet" - reasons.append(Reason(kind="type", message=f"{msg} aufgrund des Typs '{note_type}'.", score_impact=(sem_w * semantic_score * (type_weight - 1.0)))) + reasons.append(Reason(kind="type", message=f"{msg} aufgrund des Typs '{note_type}'.", score_impact=base_val * config_w_impact)) - # WP-22: Status Grund hinzufügen if status_mult != 1.0: msg = "Status-Bonus" if status_mult > 1.0 else "Status-Malus" reasons.append(Reason(kind="lifecycle", message=f"{msg} ({payload.get('status', 'unknown')}).", score_impact=0.0)) @@ -219,13 +220,13 @@ def _build_explanation( reasons.append(Reason(kind="edge", message=f"{dir_txt} '{tgt_txt}' via '{top_edge.kind}'", score_impact=impact, details={"kind": top_edge.kind})) if cent_bonus > 0.01: - reasons.append(Reason(kind="centrality", message="Knoten liegt zentral im Kontext.", score_impact=breakdown.centrality_contribution)) + reasons.append(Reason(kind="centrality", message="Knoten liegt zentral im Kontext.", score_impact=cent_w_cfg * cent_bonus)) return Explanation(breakdown=breakdown, reasons=reasons, related_edges=edges_dto if edges_dto else None) def _extract_expand_options(req: QueryRequest) -> Tuple[int, List[str] | None]: - """Extrahiert depth und edge_types für die Graph-Expansion.""" + """Extrahiert depth und edge_types für die Expansion.""" expand = getattr(req, "expand", None) if not expand: return 0, None @@ -258,7 +259,7 @@ def _build_hits_from_semantic( explain: bool = False, dynamic_edge_boosts: Dict[str, float] = None ) -> QueryResponse: - """Baut strukturierte QueryHits basierend auf den berechneten Scores.""" + """Baut strukturierte QueryHits basierend auf Hybrid-Scoring.""" t0 = time.time() enriched: List[Tuple[str, float, Dict[str, Any], float, float, float]] = [] @@ -277,27 +278,27 @@ def _build_hits_from_semantic( except Exception: cent_bonus = 0.0 - total, edge_bonus, cent_bonus = _compute_total_score( + total, eb, cb = _compute_total_score( semantic_score, payload, - edge_bonus=edge_bonus, - cent_bonus=cent_bonus, + edge_bonus_raw=edge_bonus, + cent_bonus_raw=cent_bonus, dynamic_edge_boosts=dynamic_edge_boosts ) - enriched.append((pid, float(semantic_score), payload, total, edge_bonus, cent_bonus)) + enriched.append((pid, float(semantic_score), payload, total, eb, cb)) enriched_sorted = sorted(enriched, key=lambda h: h[3], reverse=True) limited = enriched_sorted[: max(1, top_k)] results: List[QueryHit] = [] - for pid, semantic_score, payload, total, edge_bonus, cent_bonus in limited: + for pid, semantic_score, payload, total, eb, cb in limited: explanation_obj = None if explain: explanation_obj = _build_explanation( semantic_score=float(semantic_score), payload=payload, - edge_bonus=edge_bonus, - cent_bonus=cent_bonus, + edge_bonus=eb, + cent_bonus=cb, subgraph=subgraph, node_key=payload.get("chunk_id") or payload.get("note_id") ) @@ -308,8 +309,8 @@ def _build_hits_from_semantic( node_id=str(pid), note_id=payload.get("note_id", "unknown"), semantic_score=float(semantic_score), - edge_bonus=edge_bonus, - centrality_bonus=cent_bonus, + edge_bonus=eb, + centrality_bonus=cb, total_score=total, paths=None, source={ @@ -348,7 +349,7 @@ def hybrid_retrieve(req: QueryRequest) -> QueryResponse: depth, edge_types = _extract_expand_options(req) - # WP-22: Dynamic Boosts aus dem Request (vom Router) (Teil C) + # WP-22: Dynamic Boosts aus dem Request (vom Router) boost_edges = getattr(req, "boost_edges", {}) subgraph: ga.Subgraph | None = None diff --git a/app/routers/chat.py b/app/routers/chat.py index 3e5678c..ae44547 100644 --- a/app/routers/chat.py +++ b/app/routers/chat.py @@ -286,7 +286,7 @@ async def chat_endpoint( mode="hybrid", top_k=request.top_k, explain=request.explain, - # WP-22: Boosts weitergeben + # WP-22: Boosts an den Retriever weitergeben boost_edges=edge_boosts ) retrieve_result = await retriever.search(query_req) diff --git a/app/services/edge_registry.py b/app/services/edge_registry.py index 5c102b1..a7339c8 100644 --- a/app/services/edge_registry.py +++ b/app/services/edge_registry.py @@ -2,7 +2,7 @@ FILE: app/services/edge_registry.py DESCRIPTION: Single Source of Truth für Kanten-Typen. Parst '01_User_Manual/01_edge_vocabulary.md'. WP-22 Teil B: Registry & Validation. - Beachtet den dynamischen Vault-Root aus ENV oder Parameter. + FIX: Beachtet MINDNET_VAULT_ROOT aus .env korrekt. """ import re import os @@ -27,8 +27,8 @@ class EdgeRegistry: if self.initialized: return + # Priorität: 1. Parameter (Test) -> 2. ENV -> 3. Default settings = get_settings() - # Priorität: 1. Parameter (Test) -> 2. Config (.env) -> 3. Default self.vault_root = vault_root or getattr(settings, "MINDNET_VAULT_ROOT", "./vault") self.vocab_rel_path = os.path.join("01_User_Manual", "01_edge_vocabulary.md") self.unknown_log_path = "data/logs/unknown_edges.jsonl" @@ -67,35 +67,31 @@ class EdgeRegistry: clean_alias = alias.replace("`", "").lower().strip() self.canonical_map[clean_alias] = canonical - logger.info(f"EdgeRegistry loaded from {full_path}: {len(self.valid_types)} canonical types.") + logger.info(f"EdgeRegistry loaded from {full_path}: {len(self.valid_types)} types.") except Exception as e: logger.error(f"Failed to parse Edge Vocabulary at {full_path}: {e}") def resolve(self, edge_type: str) -> str: - """Normalisiert Kanten-Typen via Registry oder loggt Unbekannte für Review.""" + """Normalisiert Kanten-Typen via Registry oder loggt Unbekannte.""" if not edge_type: return "related_to" - - # Normalisierung (Kleinschreibung, Unterstriche) clean_type = edge_type.lower().strip().replace(" ", "_") - # 1. Lookup in Map (Canonical oder Alias) if clean_type in self.canonical_map: return self.canonical_map[clean_type] - # 2. Unknown Handling (Loggen aber nicht verwerfen - Learning System) self._log_unknown(clean_type) return clean_type def _log_unknown(self, edge_type: str): - """Schreibt unbekannte Typen für späteres Review in ein Log-File.""" + """Schreibt unbekannte Typen für Review in ein Log.""" try: os.makedirs(os.path.dirname(self.unknown_log_path), exist_ok=True) entry = {"unknown_type": edge_type, "status": "new"} with open(self.unknown_log_path, "a", encoding="utf-8") as f: f.write(json.dumps(entry) + "\n") except Exception: - pass + pass -# Globale Singleton Instanz +# Singleton Instanz registry = EdgeRegistry() \ No newline at end of file diff --git a/config/decision_engine.yaml b/config/decision_engine.yaml index 3df4b89..b08c8aa 100644 --- a/config/decision_engine.yaml +++ b/config/decision_engine.yaml @@ -42,7 +42,7 @@ strategies: part_of: 2.0 composed_of: 2.0 similar_to: 1.5 - caused_by: 0.5 # Kausalität ist hier oft Rauschen + caused_by: 0.5 prompt_template: "rag_template" prepend_instruction: null @@ -61,8 +61,8 @@ strategies: inject_types: ["value", "principle", "goal", "risk"] # WP-22: Risiken und Konsequenzen hervorheben edge_boosts: - blocks: 2.5 # Blocker/Risiken sind kritisch - solves: 2.0 # Lösungen sind relevant + blocks: 2.5 + solves: 2.0 depends_on: 1.5 risk_of: 2.5 prompt_template: "decision_template" @@ -85,10 +85,10 @@ strategies: inject_types: ["experience", "belief", "profile"] # WP-22: Weiche Assoziationen & Erfahrungen stärken edge_boosts: - based_on: 2.0 # Werte-Bezug - related_to: 2.0 # Assoziatives Denken + based_on: 2.0 + related_to: 2.0 experienced_in: 2.5 - blocks: 0.1 # Stressoren ausblenden + blocks: 0.1 prompt_template: "empathy_template" prepend_instruction: null @@ -108,14 +108,14 @@ strategies: inject_types: ["snippet", "reference", "source"] # WP-22: Technische Abhängigkeiten edge_boosts: - uses: 2.5 # Tool-Nutzung + uses: 2.5 depends_on: 2.0 implemented_in: 3.0 prompt_template: "technical_template" prepend_instruction: null # 5. Interview / Datenerfassung - # HINWEIS: Spezifische Typen (Projekt, Ziel etc.) werden automatisch + # HINWEIS: Spezifische Typen (Projekt, Ziel etc.) werden automatisch # über die types.yaml erkannt. Hier stehen nur generische Trigger. INTERVIEW: description: "Der User möchte Wissen erfassen." @@ -131,10 +131,9 @@ strategies: - "idee speichern" - "draft" inject_types: [] - edge_boosts: {} # Kein Retrieval im Interview Modus + edge_boosts: {} prompt_template: "interview_template" prepend_instruction: null - # Schemas: Hier nur der Fallback. # Spezifische Schemas (Project, Experience) kommen jetzt aus types.yaml! schemas: diff --git a/tests/test_WP22_intelligence.py b/tests/test_WP22_intelligence.py index 1809c13..ccfd7f2 100644 --- a/tests/test_WP22_intelligence.py +++ b/tests/test_WP22_intelligence.py @@ -1,7 +1,7 @@ """ FILE: tests/test_WP22_intelligence.py DESCRIPTION: Integrationstest für WP-22. - FIX: Erzwingt Pfad-Synchronisation für Registry & Router. + FIX: Erzwingt Pfad-Synchronisation für Registry & Router. Behebt Pydantic Validation Errors. """ import unittest import os @@ -10,88 +10,136 @@ import yaml import asyncio from unittest.mock import MagicMock, patch, AsyncMock +# --- Modul-Caching Fix: Wir müssen Caches leeren --- import app.routers.chat from app.models.dto import ChatRequest, QueryHit, QueryRequest from app.services.edge_registry import EdgeRegistry from app.core.retriever import _compute_total_score, _get_status_multiplier -from app.routers.chat import _classify_intent, chat_endpoint +from app.routers.chat import _classify_intent, get_decision_strategy, chat_endpoint class TestWP22Integration(unittest.IsolatedAsyncioTestCase): async def asyncSetUp(self): """Bereitet eine isolierte Test-Umgebung vor.""" + # Wir simulieren hier 'vault_master' (oder venv_master) als Verzeichnis self.test_root = os.path.abspath("tests/temp_wp22") self.test_vault = os.path.join(self.test_root, "vault_master") self.test_config_dir = os.path.join(self.test_root, "config") + # 1. Pfade erstellen os.makedirs(os.path.join(self.test_vault, "01_User_Manual"), exist_ok=True) os.makedirs(self.test_config_dir, exist_ok=True) os.makedirs(os.path.join(self.test_root, "data/logs"), exist_ok=True) - # 2. Config Files schreiben + # 2. Config Files schreiben (MOCK CONFIG) self.decision_path = os.path.join(self.test_config_dir, "decision_engine.yaml") self.decision_config = { "strategies": { - "FACT": {"trigger_keywords": ["was"], "edge_boosts": {"part_of": 2.0}}, - "CAUSAL": {"trigger_keywords": ["warum"], "edge_boosts": {"caused_by": 3.0}} + "FACT": { + "trigger_keywords": ["was ist"], + "edge_boosts": {"part_of": 2.0} + }, + "CAUSAL": { + "trigger_keywords": ["warum"], + "edge_boosts": {"caused_by": 3.0} + } } } - with open(self.decision_path, "w") as f: yaml.dump(self.decision_config, f) + with open(self.decision_path, "w", encoding="utf-8") as f: + yaml.dump(self.decision_config, f) - # 3. Vocabulary File am RICHTIGEN Ort + # 3. Vocabulary File am RICHTIGEN Ort relativ zum test_vault self.vocab_path = os.path.join(self.test_vault, "01_User_Manual/01_edge_vocabulary.md") - with open(self.vocab_path, "w") as f: + with open(self.vocab_path, "w", encoding="utf-8") as f: f.write("| System-Typ | Aliases |\n| :--- | :--- |\n| **caused_by** | ursache_ist |\n| **part_of** | teil_von |") # 4. MOCKING / RESETTING GLOBAL STATE + # Zwinge get_settings, unsere Test-Pfade zurückzugeben self.mock_settings = MagicMock() self.mock_settings.DECISION_CONFIG_PATH = self.decision_path self.mock_settings.MINDNET_VAULT_ROOT = self.test_vault self.mock_settings.RETRIEVER_TOP_K = 5 self.mock_settings.MODEL_NAME = "test-model" + # Patching get_settings in allen relevanten Modulen self.patch_settings_chat = patch('app.routers.chat.get_settings', return_value=self.mock_settings) self.patch_settings_registry = patch('app.services.edge_registry.get_settings', return_value=self.mock_settings) self.patch_settings_chat.start() self.patch_settings_registry.start() + # Caches zwingend leeren app.routers.chat._DECISION_CONFIG_CACHE = None + + # Registry Singleton Reset & Force Init mit Test-Pfad EdgeRegistry._instance = None self.registry = EdgeRegistry(vault_root=self.test_vault) + self.registry.unknown_log_path = os.path.join(self.test_root, "data/logs/unknown.jsonl") async def asyncTearDown(self): self.patch_settings_chat.stop() self.patch_settings_registry.stop() - if os.path.exists(self.test_root): shutil.rmtree(self.test_root) + if os.path.exists(self.test_root): + shutil.rmtree(self.test_root) EdgeRegistry._instance = None app.routers.chat._DECISION_CONFIG_CACHE = None def test_registry_resolution(self): - print("\n🔵 TEST 1: Registry Resolution") - self.assertTrue(len(self.registry.valid_types) > 0) + print("\n🔵 TEST 1: Registry Pfad & Alias Resolution") + # Prüfen ob die Datei gefunden wurde + self.assertTrue(len(self.registry.valid_types) > 0, f"Registry leer! Root: {self.registry.vault_root}") self.assertEqual(self.registry.resolve("ursache_ist"), "caused_by") print("✅ Registry OK.") def test_scoring_math(self): print("\n🔵 TEST 2: Scoring Math (Lifecycle)") with patch("app.core.retriever._get_scoring_weights", return_value=(1.0, 1.0, 0.0)): + # Stable (1.2) self.assertEqual(_get_status_multiplier({"status": "stable"}), 1.2) - self.assertEqual(_get_status_multiplier({"status": "draft"}), 0.8) + # Draft (0.5) + self.assertEqual(_get_status_multiplier({"status": "draft"}), 0.5) + + # Scoring Formel Test: BaseScore * (1 + ConfigWeight + DynamicBoost) + # BaseScore = 0.5 (sem) * 1.2 (stable) = 0.6 + # ConfigWeight = 1.0 (neutral) - 1.0 = 0.0 + # DynamicBoost = (1.0 * 0.5) = 0.5 + # Total = 0.6 * (1 + 0 + 0.5) = 0.9 + total, _, _ = _compute_total_score(0.5, {"status": "stable", "retriever_weight": 1.0}, edge_bonus_raw=0.5) + self.assertAlmostEqual(total, 0.9) print("✅ Scoring OK.") + async def test_router_intent(self): + print("\n🔵 TEST 3: Intent Classification") + mock_llm = MagicMock() + intent, _ = await _classify_intent("Warum ist das so?", mock_llm) + self.assertEqual(intent, "CAUSAL") + print("✅ Routing OK.") + async def test_full_flow(self): - print("\n🔵 TEST 3: Pipeline flow") - mock_llm = AsyncMock(); mock_llm.prompts = {}; mock_llm.generate_raw_response.return_value = "Ok" - mock_ret = AsyncMock() - mock_hit = QueryHit(node_id="c1", note_id="n1", semantic_score=0.8, edge_bonus=0.0, centrality_bonus=0.0, total_score=0.8, source={"text": "t"}, payload={"status": "active"}) - mock_ret.search.return_value.results = [mock_hit] + print("\n🔵 TEST 4: End-to-End Pipeline & Dynamic Boosting") + mock_llm = AsyncMock() + mock_llm.prompts = {} + mock_llm.generate_raw_response.return_value = "Test Antwort" - resp = await chat_endpoint(ChatRequest(message="Warum?"), llm=mock_llm, retriever=mock_ret) + mock_retriever = AsyncMock() + # Fix note_id für Pydantic Validation + mock_hit = QueryHit( + node_id="c1", note_id="test_note_n1", semantic_score=0.8, edge_bonus=0.0, + centrality_bonus=0.0, total_score=0.8, source={"text": "t"}, + payload={"status": "active", "type": "concept"} + ) + mock_retriever.search.return_value.results = [mock_hit] + + req = ChatRequest(message="Warum ist das passiert?", top_k=1) + resp = await chat_endpoint(req, llm=mock_llm, retriever=mock_retriever) + + # Verify Intent self.assertEqual(resp.intent, "CAUSAL") - called_req = mock_ret.search.call_args[0][0] + + # Verify Boosts Reached Retriever + called_req = mock_retriever.search.call_args[0][0] self.assertEqual(called_req.boost_edges.get("caused_by"), 3.0) - print("✅ Full Flow OK.") + print("✅ Full Flow & Boosting OK.") if __name__ == '__main__': unittest.main() \ No newline at end of file