From 48729e6f5dc5e3fe8d31d19e9991f38a822398b8 Mon Sep 17 00:00:00 2001
From: Lars <Lars@stommer.de>
Date: Thu, 18 Dec 2025 13:21:53 +0100
Subject: [PATCH] bug fix

---
 app/core/ingestion.py         |  4 +-
 app/core/retriever.py         | 80 +++++++++++++++++------------------
 app/services/edge_registry.py |  6 ++-
 3 files changed, 46 insertions(+), 44 deletions(-)

diff --git a/app/core/ingestion.py b/app/core/ingestion.py
index dd3ef13..6b3f232 100644
--- a/app/core/ingestion.py
+++ b/app/core/ingestion.py
@@ -162,7 +162,7 @@ class IngestionService:
         # --- WP-22: Content Lifecycle Gate ---
         status = fm.get("status", "draft").lower().strip()
         
-        # Hard Skip für System-Dateien (Teil A)
+        # Hard Skip für System-Dateien
         if status in ["system", "template", "archive", "hidden"]:
             logger.info(f"Skipping file {file_path} (Status: {status})")
             return {**result, "status": "skipped", "reason": f"lifecycle_status_{status}"}
@@ -265,7 +265,7 @@ class IngestionService:
             except TypeError:
                 raw_edges = build_edges_for_note(note_id, chunk_pls)
             
-            # --- WP-22: Edge Registry Validation (Teil B) ---
+            # --- WP-22: Edge Registry Validation ---
             edges = []
             if raw_edges:
                 for edge in raw_edges:
diff --git a/app/core/retriever.py b/app/core/retriever.py
index fe19b62..05fc309 100644
--- a/app/core/retriever.py
+++ b/app/core/retriever.py
@@ -98,7 +98,7 @@ def _semantic_hits(
         results.append((str(pid), float(score), dict(payload or {})))
     return results
 
-# --- WP-22 Helper: Lifecycle Multipliers (Teil A) ---
+# --- WP-22 Helper: Lifecycle Multipliers ---
 def _get_status_multiplier(payload: Dict[str, Any]) -> float:
     """
     WP-22: Drafts werden bestraft, Stable Notes belohnt.
@@ -106,11 +106,10 @@ def _get_status_multiplier(payload: Dict[str, Any]) -> float:
     status = str(payload.get("status", "draft")).lower()
     if status == "stable": return 1.2
     if status == "active": return 1.0
-    if status == "draft":  return 0.5  # Malus für Entwürfe
+    if status == "draft":  return 0.8  # Malus für Entwürfe
     # Fallback für andere oder leere Status
     return 1.0
 
-# --- WP-22: Dynamic Scoring Formula (Teil C) ---
 def _compute_total_score(
     semantic_score: float,
     payload: Dict[str, Any],
@@ -119,8 +118,8 @@ def _compute_total_score(
     dynamic_edge_boosts: Dict[str, float] = None
 ) -> Tuple[float, float, float]:
     """
-    Berechnet total_score nach WP-22 Formel.
-    Score = (Sem * Type * Status) + (Weighted_Edge + Cent)
+    Berechnet total_score.
+    WP-22 Update: Integration von Status-Bonus und Dynamic Edge Boosts.
     """
     raw_weight = payload.get("retriever_weight", 1.0)
     try:
@@ -133,13 +132,13 @@ def _compute_total_score(
     sem_w, edge_w, cent_w = _get_scoring_weights()
     status_mult = _get_status_multiplier(payload)
 
-    # Dynamic Edge Boosting (Teil C)
-    # Wenn dynamische Boosts aktiv sind (durch den Router), verstärken wir den Graph-Bonus global.
-    # Der konkrete kanten-spezifische Boost passiert bereits im Subgraph (hybrid_retrieve).
+    # Dynamic Edge Boosting
+    # Wenn dynamische Boosts aktiv sind, erhöhen wir den Einfluss des Graphen
+    # Dies ist eine Vereinfachung, da der echte Boost im Subgraph passiert sein sollte.
     final_edge_score = edge_w * edge_bonus
     if dynamic_edge_boosts and edge_bonus > 0:
-         # Globaler Boost-Faktor falls Intention (z.B. WHY) vorliegt
-         final_edge_score *= 1.5
+         # Globaler Boost für Graph-Signale bei spezifischen Intents
+         final_edge_score *= 1.2
 
     total = (sem_w * float(semantic_score) * weight * status_mult) + final_edge_score + (cent_w * cent_bonus)
     return float(total), float(edge_bonus), float(cent_bonus)
@@ -155,8 +154,9 @@ def _build_explanation(
     subgraph: Optional[ga.Subgraph],
     node_key: Optional[str]
 ) -> Explanation:
-    """Erstellt ein Explanation-Objekt (WP-04b)."""
+    """Erstellt ein Explanation-Objekt."""
     sem_w, _edge_w, _cent_w = _get_scoring_weights()
+    # Scoring weights erneut laden für Reason-Details
     _, edge_w_cfg, cent_w_cfg = _get_scoring_weights()
     
     try:
@@ -167,7 +167,6 @@ def _build_explanation(
     status_mult = _get_status_multiplier(payload)
     note_type = payload.get("type", "unknown")
 
-    # Breakdown Berechnung (muss mit _compute_total_score korrelieren)
     breakdown = ScoreBreakdown(
         semantic_contribution=(sem_w * semantic_score * type_weight * status_mult),
         edge_contribution=(edge_w_cfg * edge_bonus),
@@ -181,7 +180,6 @@ def _build_explanation(
     reasons: List[Reason] = []
     edges_dto: List[EdgeDTO] = []
 
-    # Reason Generation Logik (WP-04b)
     if semantic_score > 0.85:
         reasons.append(Reason(kind="semantic", message="Sehr hohe textuelle Übereinstimmung.", score_impact=breakdown.semantic_contribution))
     elif semantic_score > 0.70:
@@ -191,13 +189,11 @@ def _build_explanation(
         msg = "Bevorzugt" if type_weight > 1.0 else "Leicht abgewertet"
         reasons.append(Reason(kind="type", message=f"{msg} aufgrund des Typs '{note_type}'.", score_impact=(sem_w * semantic_score * (type_weight - 1.0))))
 
-    # NEU: WP-22 Status Reason
     if status_mult != 1.0:
         msg = "Status-Bonus" if status_mult > 1.0 else "Status-Malus"
         reasons.append(Reason(kind="lifecycle", message=f"{msg} ({payload.get('status')}).", score_impact=0.0))
 
     if subgraph and node_key and edge_bonus > 0:
-        # Extrahiere Top-Kanten für die Erklärung
         if hasattr(subgraph, "get_outgoing_edges"):
             outgoing = subgraph.get_outgoing_edges(node_key)
             for edge in outgoing:
@@ -230,7 +226,7 @@ def _build_explanation(
 
 
 def _extract_expand_options(req: QueryRequest) -> Tuple[int, List[str] | None]:
-    """Extrahiert depth und edge_types für Graph-Expansion."""
+    """Extrahiert depth und edge_types."""
     expand = getattr(req, "expand", None)
     if not expand:
         return 0, None
@@ -263,7 +259,7 @@ def _build_hits_from_semantic(
     explain: bool = False,
     dynamic_edge_boosts: Dict[str, float] = None
 ) -> QueryResponse:
-    """Baut strukturierte QueryHits basierend auf Scoring (WP-22 & WP-04b)."""
+    """Baut strukturierte QueryHits."""
     t0 = time.time()
     enriched: List[Tuple[str, float, Dict[str, Any], float, float, float]] = []
 
@@ -282,28 +278,27 @@ def _build_hits_from_semantic(
             except Exception:
                 cent_bonus = 0.0
 
-        total, eb, cb = _compute_total_score(
+        total, edge_bonus, cent_bonus = _compute_total_score(
             semantic_score, 
             payload, 
             edge_bonus=edge_bonus, 
             cent_bonus=cent_bonus,
             dynamic_edge_boosts=dynamic_edge_boosts
         )
-        enriched.append((pid, float(semantic_score), payload, total, eb, cb))
+        enriched.append((pid, float(semantic_score), payload, total, edge_bonus, cent_bonus))
 
-    # Sort & Limit
     enriched_sorted = sorted(enriched, key=lambda h: h[3], reverse=True)
     limited = enriched_sorted[: max(1, top_k)]
 
     results: List[QueryHit] = []
-    for pid, semantic_score, payload, total, eb, cb in limited:
+    for pid, semantic_score, payload, total, edge_bonus, cent_bonus in limited:
         explanation_obj = None
         if explain:
             explanation_obj = _build_explanation(
                 semantic_score=float(semantic_score),
                 payload=payload,
-                edge_bonus=eb,
-                cent_bonus=cb,
+                edge_bonus=edge_bonus,
+                cent_bonus=cent_bonus,
                 subgraph=subgraph,
                 node_key=payload.get("chunk_id") or payload.get("note_id")
             )
@@ -312,10 +307,10 @@ def _build_hits_from_semantic(
 
         results.append(QueryHit(
             node_id=str(pid),
-            note_id=payload.get("note_id", "unknown"),
+            note_id=payload.get("note_id"),
             semantic_score=float(semantic_score),
-            edge_bonus=eb,
-            centrality_bonus=cb,
+            edge_bonus=edge_bonus,
+            centrality_bonus=cent_bonus,
             total_score=total,
             paths=None,
             source={
@@ -332,7 +327,7 @@ def _build_hits_from_semantic(
 
 
 def semantic_retrieve(req: QueryRequest) -> QueryResponse:
-    """Reiner semantischer Retriever (WP-02)."""
+    """Reiner semantischer Retriever."""
     client, prefix = _get_client_and_prefix()
     vector = _get_query_vector(req)
     top_k = req.top_k or get_settings().RETRIEVER_TOP_K
@@ -342,44 +337,44 @@ def semantic_retrieve(req: QueryRequest) -> QueryResponse:
 
 
 def hybrid_retrieve(req: QueryRequest) -> QueryResponse:
-    """Hybrid-Retriever: semantische Suche + optionale Edge-Expansion (WP-04a)."""
+    """Hybrid-Retriever: semantische Suche + optionale Edge-Expansion."""
     client, prefix = _get_client_and_prefix()
-    
-    # 1. Semantische Suche
-    vector = list(req.query_vector) if req.query_vector else _get_query_vector(req)
+    if req.query_vector:
+        vector = list(req.query_vector)
+    else:
+        vector = _get_query_vector(req)
+
     top_k = req.top_k or get_settings().RETRIEVER_TOP_K
     hits = _semantic_hits(client, prefix, vector, top_k=top_k, filters=req.filters)
 
-    # 2. Graph Expansion & Custom Boosting (WP-22 Teil C)
     depth, edge_types = _extract_expand_options(req)
+    
+    # WP-22: Dynamic Boosts aus dem Request (vom Router)
     boost_edges = getattr(req, "boost_edges", {}) 
 
     subgraph: ga.Subgraph | None = None
     if depth and depth > 0:
         seed_ids: List[str] = []
         for _pid, _score, payload in hits:
-            key = payload.get("note_id")
+            key = payload.get("chunk_id") or payload.get("note_id")
             if key and key not in seed_ids:
                 seed_ids.append(key)
-        
         if seed_ids:
             try:
-                # Subgraph laden
+                # Hier könnten wir boost_edges auch an expand übergeben, wenn ga.expand es unterstützt
                 subgraph = ga.expand(client, prefix, seed_ids, depth=depth, edge_types=edge_types)
                 
-                # --- WP-22: Kanten-Boosts im RAM-Graphen anwenden ---
-                # Dies manipuliert die Gewichte im Graphen, bevor der 'edge_bonus' berechnet wird.
+                # Manuelles Boosten der Kantengewichte im Graphen falls aktiv
                 if boost_edges and subgraph and hasattr(subgraph, "graph"):
                      for u, v, data in subgraph.graph.edges(data=True):
                         k = data.get("kind")
                         if k in boost_edges:
-                            # Gewicht multiplizieren (z.B. caused_by * 3.0)
+                            # Gewicht erhöhen für diesen Query-Kontext
                             data["weight"] = data.get("weight", 1.0) * boost_edges[k]
 
             except Exception:
                 subgraph = None
 
-    # 3. Scoring & Re-Ranking
     return _build_hits_from_semantic(
         hits, 
         top_k=top_k, 
@@ -391,6 +386,11 @@ def hybrid_retrieve(req: QueryRequest) -> QueryResponse:
 
 
 class Retriever:
-    """Wrapper-Klasse für Suchoperationen."""
+    """
+    Wrapper-Klasse für WP-05 (Chat).
+    """
+    def __init__(self):
+        pass
+
     async def search(self, request: QueryRequest) -> QueryResponse:
         return hybrid_retrieve(request)
\ No newline at end of file
diff --git a/app/services/edge_registry.py b/app/services/edge_registry.py
index b58d1b3..be63332 100644
--- a/app/services/edge_registry.py
+++ b/app/services/edge_registry.py
@@ -2,7 +2,7 @@
 FILE: app/services/edge_registry.py
 DESCRIPTION: Single Source of Truth für Kanten-Typen. Parst '01_User_Manual/01_edge_vocabulary.md'.
              WP-22 Teil B: Registry & Validation.
-             Beachtet den dynamischen Vault-Root aus ENV oder Parameter.
+             FIX: Dynamische Pfad-Auflösung basierend auf MINDNET_VAULT_ROOT.
 """
 import re
 import os
@@ -25,7 +25,7 @@ class EdgeRegistry:
         if self.initialized: 
             return
             
-        # Priorität: 1. Parameter -> 2. ENV -> 3. Default
+        # Priorität: 1. Parameter (Test) -> 2. ENV (dotenv) -> 3. Default
         self.vault_root = vault_root or os.getenv("MINDNET_VAULT_ROOT", "./vault")
         self.vocab_rel_path = os.path.join("01_User_Manual", "01_edge_vocabulary.md")
         self.unknown_log_path = "data/logs/unknown_edges.jsonl"
@@ -38,9 +38,11 @@ class EdgeRegistry:
 
     def _load_vocabulary(self):
         """Parst die Markdown-Tabelle im Vault."""
+        # Absoluten Pfad auflösen, um Verwechslungen im venv zu vermeiden
         full_path = os.path.abspath(os.path.join(self.vault_root, self.vocab_rel_path))
         
         if not os.path.exists(full_path):
+            # Debug-Info: Zeige wo genau gesucht wurde
             logger.warning(f"Edge Vocabulary NOT found at: {full_path}. Registry is empty.")
             return