bug fix

2025-12-18 13:15:58 +01:00 · 2025-12-18 13:15:58 +01:00 · 3eac646cb6
commit 3eac646cb6
parent 9a18f3cc8b
4 changed files with 131 additions and 226 deletions
--- a/app/core/ingestion.py
+++ b/app/core/ingestion.py
@ -162,7 +162,7 @@ class IngestionService:
        # --- WP-22: Content Lifecycle Gate ---
        status = fm.get("status", "draft").lower().strip()
        
-        # Hard Skip für System-Dateien
+        # Hard Skip für System-Dateien (Teil A)
        if status in ["system", "template", "archive", "hidden"]:
            logger.info(f"Skipping file {file_path} (Status: {status})")
            return {**result, "status": "skipped", "reason": f"lifecycle_status_{status}"}
@ -265,7 +265,7 @@ class IngestionService:
            except TypeError:
                raw_edges = build_edges_for_note(note_id, chunk_pls)
            
-            # --- WP-22: Edge Registry Validation ---
+            # --- WP-22: Edge Registry Validation (Teil B) ---
            edges = []
            if raw_edges:
                for edge in raw_edges:
--- a/app/core/retriever.py
+++ b/app/core/retriever.py
@ -98,7 +98,7 @@ def _semantic_hits(
        results.append((str(pid), float(score), dict(payload or {})))
    return results

-# --- WP-22 Helper: Lifecycle Multipliers ---
+# --- WP-22 Helper: Lifecycle Multipliers (Teil A) ---
 def _get_status_multiplier(payload: Dict[str, Any]) -> float:
    """
    WP-22: Drafts werden bestraft, Stable Notes belohnt.
@ -106,10 +106,11 @@ def _get_status_multiplier(payload: Dict[str, Any]) -> float:
    status = str(payload.get("status", "draft")).lower()
    if status == "stable": return 1.2
    if status == "active": return 1.0
-    if status == "draft":  return 0.8  # Malus für Entwürfe
+    if status == "draft":  return 0.5  # Malus für Entwürfe
    # Fallback für andere oder leere Status
    return 1.0

+# --- WP-22: Dynamic Scoring Formula (Teil C) ---
 def _compute_total_score(
    semantic_score: float,
    payload: Dict[str, Any],
@ -118,8 +119,8 @@ def _compute_total_score(
    dynamic_edge_boosts: Dict[str, float] = None
 ) -> Tuple[float, float, float]:
    """
-    Berechnet total_score.
-    WP-22 Update: Integration von Status-Bonus und Dynamic Edge Boosts.
+    Berechnet total_score nach WP-22 Formel.
+    Score = (Sem * Type * Status) + (Weighted_Edge + Cent)
    """
    raw_weight = payload.get("retriever_weight", 1.0)
    try:
@ -132,13 +133,13 @@ def _compute_total_score(
    sem_w, edge_w, cent_w = _get_scoring_weights()
    status_mult = _get_status_multiplier(payload)

-    # Dynamic Edge Boosting
-    # Wenn dynamische Boosts aktiv sind, erhöhen wir den Einfluss des Graphen
-    # Dies ist eine Vereinfachung, da der echte Boost im Subgraph passiert sein sollte.
+    # Dynamic Edge Boosting (Teil C)
+    # Wenn dynamische Boosts aktiv sind (durch den Router), verstärken wir den Graph-Bonus global.
+    # Der konkrete kanten-spezifische Boost passiert bereits im Subgraph (hybrid_retrieve).
    final_edge_score = edge_w * edge_bonus
    if dynamic_edge_boosts and edge_bonus > 0:
-         # Globaler Boost für Graph-Signale bei spezifischen Intents
-         final_edge_score *= 1.2
+         # Globaler Boost-Faktor falls Intention (z.B. WHY) vorliegt
+         final_edge_score *= 1.5

    total = (sem_w * float(semantic_score) * weight * status_mult) + final_edge_score + (cent_w * cent_bonus)
    return float(total), float(edge_bonus), float(cent_bonus)
@ -154,9 +155,8 @@ def _build_explanation(
    subgraph: Optional[ga.Subgraph],
    node_key: Optional[str]
 ) -> Explanation:
-    """Erstellt ein Explanation-Objekt."""
+    """Erstellt ein Explanation-Objekt (WP-04b)."""
    sem_w, _edge_w, _cent_w = _get_scoring_weights()
-    # Scoring weights erneut laden für Reason-Details
    _, edge_w_cfg, cent_w_cfg = _get_scoring_weights()
    
    try:
@ -167,6 +167,7 @@ def _build_explanation(
    status_mult = _get_status_multiplier(payload)
    note_type = payload.get("type", "unknown")

+    # Breakdown Berechnung (muss mit _compute_total_score korrelieren)
    breakdown = ScoreBreakdown(
        semantic_contribution=(sem_w * semantic_score * type_weight * status_mult),
        edge_contribution=(edge_w_cfg * edge_bonus),
@ -180,6 +181,7 @@ def _build_explanation(
    reasons: List[Reason] = []
    edges_dto: List[EdgeDTO] = []

+    # Reason Generation Logik (WP-04b)
    if semantic_score > 0.85:
        reasons.append(Reason(kind="semantic", message="Sehr hohe textuelle Übereinstimmung.", score_impact=breakdown.semantic_contribution))
    elif semantic_score > 0.70:
@ -189,11 +191,13 @@ def _build_explanation(
        msg = "Bevorzugt" if type_weight > 1.0 else "Leicht abgewertet"
        reasons.append(Reason(kind="type", message=f"{msg} aufgrund des Typs '{note_type}'.", score_impact=(sem_w * semantic_score * (type_weight - 1.0))))

+    # NEU: WP-22 Status Reason
    if status_mult != 1.0:
        msg = "Status-Bonus" if status_mult > 1.0 else "Status-Malus"
        reasons.append(Reason(kind="lifecycle", message=f"{msg} ({payload.get('status')}).", score_impact=0.0))

    if subgraph and node_key and edge_bonus > 0:
+        # Extrahiere Top-Kanten für die Erklärung
        if hasattr(subgraph, "get_outgoing_edges"):
            outgoing = subgraph.get_outgoing_edges(node_key)
            for edge in outgoing:
@ -226,7 +230,7 @@ def _build_explanation(


 def _extract_expand_options(req: QueryRequest) -> Tuple[int, List[str] | None]:
-    """Extrahiert depth und edge_types."""
+    """Extrahiert depth und edge_types für Graph-Expansion."""
    expand = getattr(req, "expand", None)
    if not expand:
        return 0, None
@ -259,7 +263,7 @@ def _build_hits_from_semantic(
    explain: bool = False,
    dynamic_edge_boosts: Dict[str, float] = None
 ) -> QueryResponse:
-    """Baut strukturierte QueryHits."""
+    """Baut strukturierte QueryHits basierend auf Scoring (WP-22 & WP-04b)."""
    t0 = time.time()
    enriched: List[Tuple[str, float, Dict[str, Any], float, float, float]] = []

@ -278,27 +282,28 @@ def _build_hits_from_semantic(
            except Exception:
                cent_bonus = 0.0

-        total, edge_bonus, cent_bonus = _compute_total_score(
+        total, eb, cb = _compute_total_score(
            semantic_score, 
            payload, 
            edge_bonus=edge_bonus, 
            cent_bonus=cent_bonus,
            dynamic_edge_boosts=dynamic_edge_boosts
        )
-        enriched.append((pid, float(semantic_score), payload, total, edge_bonus, cent_bonus))
+        enriched.append((pid, float(semantic_score), payload, total, eb, cb))

+    # Sort & Limit
    enriched_sorted = sorted(enriched, key=lambda h: h[3], reverse=True)
    limited = enriched_sorted[: max(1, top_k)]

    results: List[QueryHit] = []
-    for pid, semantic_score, payload, total, edge_bonus, cent_bonus in limited:
+    for pid, semantic_score, payload, total, eb, cb in limited:
        explanation_obj = None
        if explain:
            explanation_obj = _build_explanation(
                semantic_score=float(semantic_score),
                payload=payload,
-                edge_bonus=edge_bonus,
-                cent_bonus=cent_bonus,
+                edge_bonus=eb,
+                cent_bonus=cb,
                subgraph=subgraph,
                node_key=payload.get("chunk_id") or payload.get("note_id")
            )
@ -307,10 +312,10 @@ def _build_hits_from_semantic(

        results.append(QueryHit(
            node_id=str(pid),
-            note_id=payload.get("note_id"),
+            note_id=payload.get("note_id", "unknown"),
            semantic_score=float(semantic_score),
-            edge_bonus=edge_bonus,
-            centrality_bonus=cent_bonus,
+            edge_bonus=eb,
+            centrality_bonus=cb,
            total_score=total,
            paths=None,
            source={
@ -327,7 +332,7 @@ def _build_hits_from_semantic(


 def semantic_retrieve(req: QueryRequest) -> QueryResponse:
-    """Reiner semantischer Retriever."""
+    """Reiner semantischer Retriever (WP-02)."""
    client, prefix = _get_client_and_prefix()
    vector = _get_query_vector(req)
    top_k = req.top_k or get_settings().RETRIEVER_TOP_K
@ -337,44 +342,44 @@ def semantic_retrieve(req: QueryRequest) -> QueryResponse:


 def hybrid_retrieve(req: QueryRequest) -> QueryResponse:
-    """Hybrid-Retriever: semantische Suche + optionale Edge-Expansion."""
+    """Hybrid-Retriever: semantische Suche + optionale Edge-Expansion (WP-04a)."""
    client, prefix = _get_client_and_prefix()
-    if req.query_vector:
-        vector = list(req.query_vector)
-    else:
-        vector = _get_query_vector(req)
-
+    
+    # 1. Semantische Suche
+    vector = list(req.query_vector) if req.query_vector else _get_query_vector(req)
    top_k = req.top_k or get_settings().RETRIEVER_TOP_K
    hits = _semantic_hits(client, prefix, vector, top_k=top_k, filters=req.filters)

+    # 2. Graph Expansion & Custom Boosting (WP-22 Teil C)
    depth, edge_types = _extract_expand_options(req)
-    
-    # WP-22: Dynamic Boosts aus dem Request (vom Router)
    boost_edges = getattr(req, "boost_edges", {}) 

    subgraph: ga.Subgraph | None = None
    if depth and depth > 0:
        seed_ids: List[str] = []
        for _pid, _score, payload in hits:
-            key = payload.get("chunk_id") or payload.get("note_id")
+            key = payload.get("note_id")
            if key and key not in seed_ids:
                seed_ids.append(key)
+        
        if seed_ids:
            try:
-                # Hier könnten wir boost_edges auch an expand übergeben, wenn ga.expand es unterstützt
+                # Subgraph laden
                subgraph = ga.expand(client, prefix, seed_ids, depth=depth, edge_types=edge_types)
                
-                # Manuelles Boosten der Kantengewichte im Graphen falls aktiv
+                # --- WP-22: Kanten-Boosts im RAM-Graphen anwenden ---
+                # Dies manipuliert die Gewichte im Graphen, bevor der 'edge_bonus' berechnet wird.
                if boost_edges and subgraph and hasattr(subgraph, "graph"):
                     for u, v, data in subgraph.graph.edges(data=True):
                        k = data.get("kind")
                        if k in boost_edges:
-                            # Gewicht erhöhen für diesen Query-Kontext
+                            # Gewicht multiplizieren (z.B. caused_by * 3.0)
                            data["weight"] = data.get("weight", 1.0) * boost_edges[k]

            except Exception:
                subgraph = None

+    # 3. Scoring & Re-Ranking
    return _build_hits_from_semantic(
        hits, 
        top_k=top_k, 
@ -386,11 +391,6 @@ def hybrid_retrieve(req: QueryRequest) -> QueryResponse:


 class Retriever:
-    """
-    Wrapper-Klasse für WP-05 (Chat).
-    """
-    def __init__(self):
-        pass
-
+    """Wrapper-Klasse für Suchoperationen."""
    async def search(self, request: QueryRequest) -> QueryResponse:
        return hybrid_retrieve(request)
--- a/app/services/edge_registry.py
+++ b/app/services/edge_registry.py
@ -2,7 +2,7 @@
 FILE: app/services/edge_registry.py
 DESCRIPTION: Single Source of Truth für Kanten-Typen. Parst '01_User_Manual/01_edge_vocabulary.md'.
             WP-22 Teil B: Registry & Validation.
-             FIX: Beachtet MINDNET_VAULT_ROOT aus .env korrekt.
+             Beachtet den dynamischen Vault-Root aus ENV oder Parameter.
 """
 import re
 import os
@ -25,15 +25,11 @@ class EdgeRegistry:
        if self.initialized: 
            return
            
-        # Priorität 1: Übergebener Parameter (z.B. für Tests)
-        # Priorität 2: Environment Variable (z.B. Production ./vault_master)
-        # Priorität 3: Default Fallback (./vault)
+        # Priorität: 1. Parameter -> 2. ENV -> 3. Default
        self.vault_root = vault_root or os.getenv("MINDNET_VAULT_ROOT", "./vault")
-        
-        # Der relative Pfad ist laut Spezifikation fest definiert
        self.vocab_rel_path = os.path.join("01_User_Manual", "01_edge_vocabulary.md")
-        
        self.unknown_log_path = "data/logs/unknown_edges.jsonl"
+        
        self.canonical_map: Dict[str, str] = {} 
        self.valid_types: Set[str] = set()
        
@ -42,15 +38,13 @@ class EdgeRegistry:

    def _load_vocabulary(self):
        """Parst die Markdown-Tabelle im Vault."""
-        # Absoluten Pfad auflösen, um Verwirrung mit cwd zu vermeiden
        full_path = os.path.abspath(os.path.join(self.vault_root, self.vocab_rel_path))
        
        if not os.path.exists(full_path):
-            # Wir loggen den vollen Pfad, damit Debugging einfacher ist
            logger.warning(f"Edge Vocabulary NOT found at: {full_path}. Registry is empty.")
            return

-        # Regex: | **canonical** | alias, alias |
+        # Regex für Markdown Tabellen: | **canonical** | Aliases | ...
        pattern = re.compile(r"\|\s*\*\*([a-z_]+)\*\*\s*\|\s*([^|]+)\|")

        try:
@ -67,7 +61,7 @@ class EdgeRegistry:
                        if aliases_str and "Kein Alias" not in aliases_str:
                            aliases = [a.strip() for a in aliases_str.split(",") if a.strip()]
                            for alias in aliases:
-                                clean_alias = alias.replace("`", "")
+                                clean_alias = alias.replace("`", "").lower().strip()
                                self.canonical_map[clean_alias] = canonical
            
            logger.info(f"EdgeRegistry loaded from {full_path}: {len(self.valid_types)} types.")
@ -76,6 +70,7 @@ class EdgeRegistry:
            logger.error(f"Failed to parse Edge Vocabulary at {full_path}: {e}")

    def resolve(self, edge_type: str) -> str:
+        """Normalisiert Kanten-Typen via Registry oder loggt Unbekannte."""
        if not edge_type: return "related_to"
        clean_type = edge_type.lower().strip().replace(" ", "_")
        
@ -86,6 +81,7 @@ class EdgeRegistry:
        return clean_type 

    def _log_unknown(self, edge_type: str):
+        """Schreibt unbekannte Typen für Review in ein Log."""
        try:
            os.makedirs(os.path.dirname(self.unknown_log_path), exist_ok=True)
            entry = {"unknown_type": edge_type, "status": "new"}
@ -94,5 +90,5 @@ class EdgeRegistry:
        except Exception:
            pass

-# Default Instanz
+# Singleton Instanz
 registry = EdgeRegistry()
--- a/tests/test_WP22_intelligence.py
+++ b/tests/test_WP22_intelligence.py
@ -1,188 +1,97 @@
 """
-FILE: tests/test_WP22_integration.py
-DESCRIPTION: Integrationstest für WP-22 (Graph Intelligence).
-             FIXES: Pydantic Validation & Config Caching Issues.
+FILE: app/services/edge_registry.py
+DESCRIPTION: Single Source of Truth für Kanten-Typen. Parst '01_User_Manual/01_edge_vocabulary.md'.
+             WP-22 Teil B: Registry & Validation.
+             FIX: Beachtet MINDNET_VAULT_ROOT aus .env korrekt.
 """
-import unittest
+import re
 import os
-import shutil
 import json
-import yaml
-import asyncio
-from unittest.mock import MagicMock, patch, AsyncMock
+import logging
+from typing import Dict, Optional, Set

-# Wir importieren das Modul direkt, um auf den Cache zuzugreifen
-import app.routers.chat 
+logger = logging.getLogger(__name__)

-# DTOs und Logik
-from app.models.dto import ChatRequest, QueryRequest, QueryHit
-from app.services.edge_registry import EdgeRegistry
-from app.core.retriever import _compute_total_score, _get_status_multiplier
-from app.routers.chat import _classify_intent, get_decision_strategy, chat_endpoint
+class EdgeRegistry:
+    _instance = None

-class TestWP22Integration(unittest.IsolatedAsyncioTestCase):
+    def __new__(cls, vault_root: Optional[str] = None):
+        if cls._instance is None:
+            cls._instance = super(EdgeRegistry, cls).__new__(cls)
+            cls._instance.initialized = False
+        return cls._instance

-    def setUp(self):
-        """Bereitet eine isolierte Test-Umgebung vor."""
-        self.test_dir = "tests/temp_integration"
-        
-        # 1. Environment Patching
-        self.os_env_patch = patch.dict(os.environ, {
-            "MINDNET_VAULT_ROOT": self.test_dir,
-            "MINDNET_DECISION_CONFIG": os.path.join(self.test_dir, "config", "decision_engine.yaml"),
-            "MINDNET_TYPES_FILE": os.path.join(self.test_dir, "config", "types.yaml")
-        })
-        self.os_env_patch.start()
-
-        # 2. Verzeichnisse erstellen
-        os.makedirs(os.path.join(self.test_dir, "config"), exist_ok=True)
-        os.makedirs(os.path.join(self.test_dir, "01_User_Manual"), exist_ok=True)
-        os.makedirs(os.path.join(self.test_dir, "data", "logs"), exist_ok=True)
-
-        # 3. Config: decision_engine.yaml schreiben (Test-Definition)
-        self.decision_config = {
-            "strategies": {
-                "FACT": {
-                    "trigger_keywords": ["was ist"],
-                    "edge_boosts": {"part_of": 2.0} # Kein 'caused_by' hier!
-                },
-                "CAUSAL": {
-                    "trigger_keywords": ["warum", "weshalb"],
-                    "edge_boosts": {"caused_by": 3.0, "related_to": 0.5}
-                }
-            }
-        }
-        with open(os.environ["MINDNET_DECISION_CONFIG"], "w") as f:
-            yaml.dump(self.decision_config, f)
-
-        # 4. Config: Edge Vocabulary schreiben
-        vocab_path = os.path.join(self.test_dir, "01_User_Manual", "01_edge_vocabulary.md")
-        with open(vocab_path, "w") as f:
-            f.write("| **caused_by** | ursache_ist, wegen |\n| **part_of** | teil_von |")
-
-        # 5. CACHE RESET (WICHTIG!)
-        # Damit der Router die oben geschriebene YAML auch wirklich liest:
-        app.routers.chat._DECISION_CONFIG_CACHE = None
-        EdgeRegistry._instance = None
-        
-        # Registry neu init
-        self.registry = EdgeRegistry(vault_root=self.test_dir)
-
-    def tearDown(self):
-        self.os_env_patch.stop()
-        if os.path.exists(self.test_dir):
-            shutil.rmtree(self.test_dir)
-        EdgeRegistry._instance = None
-        app.routers.chat._DECISION_CONFIG_CACHE = None
-
-    # ------------------------------------------------------------------------
-    # TEST 1: Edge Registry & Validation
-    # ------------------------------------------------------------------------
-    def test_edge_registry_aliases(self):
-        print("\n🔵 TEST 1: Edge Registry Resolution")
-        resolved = self.registry.resolve("ursache_ist")
-        self.assertEqual(resolved, "caused_by")
-        
-        unknown = self.registry.resolve("foobar_link")
-        self.assertEqual(unknown, "foobar_link")
-        
-        log_path = self.registry.unknown_log_path
-        self.assertTrue(os.path.exists(log_path))
-        print("✅ Registry funktioniert.")
-
-    # ------------------------------------------------------------------------
-    # TEST 2: Lifecycle Scoring
-    # ------------------------------------------------------------------------
-    def test_lifecycle_scoring_logic(self):
-        print("\n🔵 TEST 2: Lifecycle Scoring")
-        with patch("app.core.retriever._get_scoring_weights", return_value=(1.0, 0.5, 0.0)):
-            base_sem = 0.9
+    def __init__(self, vault_root: Optional[str] = None):
+        if self.initialized: 
+            return
            
-            payload_draft = {"status": "draft", "retriever_weight": 1.0}
-            mult_draft = _get_status_multiplier(payload_draft)
-            self.assertEqual(mult_draft, 0.8)
+        # Priorität 1: Übergebener Parameter (z.B. für Tests)
+        # Priorität 2: Environment Variable (z.B. Production ./vault_master)
+        # Priorität 3: Default Fallback (./vault)
+        self.vault_root = vault_root or os.getenv("MINDNET_VAULT_ROOT", "./vault")
+        
+        # Der relative Pfad ist laut Spezifikation fest definiert
+        self.vocab_rel_path = os.path.join("01_User_Manual", "01_edge_vocabulary.md")
+        
+        self.unknown_log_path = "data/logs/unknown_edges.jsonl"
+        self.canonical_map: Dict[str, str] = {} 
+        self.valid_types: Set[str] = set()
+        
+        self._load_vocabulary()
+        self.initialized = True
+
+    def _load_vocabulary(self):
+        """Parst die Markdown-Tabelle im Vault."""
+        # Absoluten Pfad auflösen, um Verwirrung mit cwd zu vermeiden
+        full_path = os.path.abspath(os.path.join(self.vault_root, self.vocab_rel_path))
+        
+        if not os.path.exists(full_path):
+            logger.warning(f"Edge Vocabulary NOT found at: {full_path}. Registry is empty.")
+            return
+
+        # Regex: | **canonical** | alias, alias |
+        pattern = re.compile(r"\|\s*\*\*([a-z_]+)\*\*\s*\|\s*([^|]+)\|")
+
+        try:
+            with open(full_path, "r", encoding="utf-8") as f:
+                for line in f:
+                    match = pattern.search(line)
+                    if match:
+                        canonical = match.group(1).strip()
+                        aliases_str = match.group(2).strip()
+                        
+                        self.valid_types.add(canonical)
+                        self.canonical_map[canonical] = canonical
+                        
+                        if aliases_str and "Kein Alias" not in aliases_str:
+                            aliases = [a.strip() for a in aliases_str.split(",") if a.strip()]
+                            for alias in aliases:
+                                clean_alias = alias.replace("`", "")
+                                self.canonical_map[clean_alias] = canonical
            
-            payload_stable = {"status": "stable", "retriever_weight": 1.0}
-            mult_stable = _get_status_multiplier(payload_stable)
-            self.assertEqual(mult_stable, 1.2)
-        print("✅ Lifecycle Scoring korrekt.")
+            logger.info(f"EdgeRegistry loaded from {full_path}: {len(self.valid_types)} types.")

-    # ------------------------------------------------------------------------
-    # TEST 3: Semantic Router & Boosting
-    # ------------------------------------------------------------------------
-    async def test_router_integration(self):
-        print("\n🔵 TEST 3: Semantic Router Integration")
-        
-        mock_llm = MagicMock()
-        mock_llm.prompts = {}
-        
-        # Da der Cache im setUp gelöscht wurde, sollte er jetzt CAUSAL finden
-        query_causal = "Warum ist das Projekt gescheitert?"
-        intent, source = await _classify_intent(query_causal, mock_llm)
-        
-        self.assertEqual(intent, "CAUSAL", f"Erwartete CAUSAL, bekam {intent} via {source}")
-        
-        strategy = get_decision_strategy(intent)
-        boosts = strategy.get("edge_boosts", {})
-        self.assertEqual(boosts.get("caused_by"), 3.0)
-        print("✅ Router lädt Config korrekt.")
+        except Exception as e:
+            logger.error(f"Failed to parse Edge Vocabulary at {full_path}: {e}")

-    # ------------------------------------------------------------------------
-    # TEST 4: Full Pipeline
-    # ------------------------------------------------------------------------
-    async def test_full_pipeline_flow(self):
-        print("\n🔵 TEST 4: Full Chat Pipeline")
+    def resolve(self, edge_type: str) -> str:
+        if not edge_type: return "related_to"
+        clean_type = edge_type.lower().strip().replace(" ", "_")
        
-        mock_llm = AsyncMock()
-        mock_llm.prompts = {}
-        mock_llm.generate_raw_response.return_value = "Antwort."
+        if clean_type in self.canonical_map:
+            return self.canonical_map[clean_type]
        
-        mock_retriever = AsyncMock()
-        # FIX: note_id hinzugefügt für Pydantic
-        mock_hit = QueryHit(
-            node_id="123", 
-            note_id="test_note_123", # <--- WICHTIG
-            semantic_score=0.9, 
-            edge_bonus=0.5, 
-            centrality_bonus=0.0, 
-            total_score=1.0,
-            source={"text": "Inhalt"}, 
-            payload={"type": "concept"}
-        )
-        mock_retriever.search.return_value.results = [mock_hit]
+        self._log_unknown(clean_type)
+        return clean_type 

-        req = ChatRequest(message="Warum ist das passiert?", top_k=3)
-        
-        response = await chat_endpoint(req, llm=mock_llm, retriever=mock_retriever)
-        
-        called_query_req = mock_retriever.search.call_args[0][0]
-        self.assertEqual(called_query_req.boost_edges.get("caused_by"), 3.0)
-        self.assertEqual(response.intent, "CAUSAL")
-        print("✅ Pipeline reicht Boosts weiter.")
+    def _log_unknown(self, edge_type: str):
+        try:
+            os.makedirs(os.path.dirname(self.unknown_log_path), exist_ok=True)
+            entry = {"unknown_type": edge_type, "status": "new"}
+            with open(self.unknown_log_path, "a", encoding="utf-8") as f:
+                f.write(json.dumps(entry) + "\n")
+        except Exception:
+            pass

-    # ------------------------------------------------------------------------
-    # TEST 5: Regression Check
-    # ------------------------------------------------------------------------
-    async def test_regression_standard_query(self):
-        print("\n🔵 TEST 5: Regression")
-        
-        mock_llm = AsyncMock()
-        mock_llm.prompts = {}
-        mock_llm.generate_raw_response.return_value = "Antwort."
-        
-        mock_retriever = AsyncMock()
-        mock_retriever.search.return_value.results = []
-        
-        req = ChatRequest(message="Was ist das?", top_k=3)
-        
-        response = await chat_endpoint(req, llm=mock_llm, retriever=mock_retriever)
-        
-        called_query_req = mock_retriever.search.call_args[0][0]
-        
-        # FACT strategy hat in unserem Test Setup NUR 'part_of', KEIN 'caused_by'
-        self.assertEqual(response.intent, "FACT")
-        self.assertNotIn("caused_by", called_query_req.boost_edges or {})
-        print("✅ Regression Test bestanden.")
-
-if __name__ == '__main__':
-    unittest.main()
+# Default Instanz
+registry = EdgeRegistry()