bug fix

2025-12-18 13:21:53 +01:00 · 2025-12-18 13:21:53 +01:00 · 48729e6f5d
commit 48729e6f5d
parent 3eac646cb6
3 changed files with 46 additions and 44 deletions
--- a/app/core/ingestion.py
+++ b/app/core/ingestion.py
@ -162,7 +162,7 @@ class IngestionService:
        # --- WP-22: Content Lifecycle Gate ---
        status = fm.get("status", "draft").lower().strip()
        
-        # Hard Skip für System-Dateien (Teil A)
+        # Hard Skip für System-Dateien
        if status in ["system", "template", "archive", "hidden"]:
            logger.info(f"Skipping file {file_path} (Status: {status})")
            return {**result, "status": "skipped", "reason": f"lifecycle_status_{status}"}
@ -265,7 +265,7 @@ class IngestionService:
            except TypeError:
                raw_edges = build_edges_for_note(note_id, chunk_pls)
            
-            # --- WP-22: Edge Registry Validation (Teil B) ---
+            # --- WP-22: Edge Registry Validation ---
            edges = []
            if raw_edges:
                for edge in raw_edges:
--- a/app/core/retriever.py
+++ b/app/core/retriever.py
@ -98,7 +98,7 @@ def _semantic_hits(
        results.append((str(pid), float(score), dict(payload or {})))
    return results

-# --- WP-22 Helper: Lifecycle Multipliers (Teil A) ---
+# --- WP-22 Helper: Lifecycle Multipliers ---
 def _get_status_multiplier(payload: Dict[str, Any]) -> float:
    """
    WP-22: Drafts werden bestraft, Stable Notes belohnt.
@ -106,11 +106,10 @@ def _get_status_multiplier(payload: Dict[str, Any]) -> float:
    status = str(payload.get("status", "draft")).lower()
    if status == "stable": return 1.2
    if status == "active": return 1.0
-    if status == "draft":  return 0.5  # Malus für Entwürfe
+    if status == "draft":  return 0.8  # Malus für Entwürfe
    # Fallback für andere oder leere Status
    return 1.0

-# --- WP-22: Dynamic Scoring Formula (Teil C) ---
 def _compute_total_score(
    semantic_score: float,
    payload: Dict[str, Any],
@ -119,8 +118,8 @@ def _compute_total_score(
    dynamic_edge_boosts: Dict[str, float] = None
 ) -> Tuple[float, float, float]:
    """
-    Berechnet total_score nach WP-22 Formel.
-    Score = (Sem * Type * Status) + (Weighted_Edge + Cent)
+    Berechnet total_score.
+    WP-22 Update: Integration von Status-Bonus und Dynamic Edge Boosts.
    """
    raw_weight = payload.get("retriever_weight", 1.0)
    try:
@ -133,13 +132,13 @@ def _compute_total_score(
    sem_w, edge_w, cent_w = _get_scoring_weights()
    status_mult = _get_status_multiplier(payload)

-    # Dynamic Edge Boosting (Teil C)
-    # Wenn dynamische Boosts aktiv sind (durch den Router), verstärken wir den Graph-Bonus global.
-    # Der konkrete kanten-spezifische Boost passiert bereits im Subgraph (hybrid_retrieve).
+    # Dynamic Edge Boosting
+    # Wenn dynamische Boosts aktiv sind, erhöhen wir den Einfluss des Graphen
+    # Dies ist eine Vereinfachung, da der echte Boost im Subgraph passiert sein sollte.
    final_edge_score = edge_w * edge_bonus
    if dynamic_edge_boosts and edge_bonus > 0:
-         # Globaler Boost-Faktor falls Intention (z.B. WHY) vorliegt
-         final_edge_score *= 1.5
+         # Globaler Boost für Graph-Signale bei spezifischen Intents
+         final_edge_score *= 1.2

    total = (sem_w * float(semantic_score) * weight * status_mult) + final_edge_score + (cent_w * cent_bonus)
    return float(total), float(edge_bonus), float(cent_bonus)
@ -155,8 +154,9 @@ def _build_explanation(
    subgraph: Optional[ga.Subgraph],
    node_key: Optional[str]
 ) -> Explanation:
-    """Erstellt ein Explanation-Objekt (WP-04b)."""
+    """Erstellt ein Explanation-Objekt."""
    sem_w, _edge_w, _cent_w = _get_scoring_weights()
+    # Scoring weights erneut laden für Reason-Details
    _, edge_w_cfg, cent_w_cfg = _get_scoring_weights()
    
    try:
@ -167,7 +167,6 @@ def _build_explanation(
    status_mult = _get_status_multiplier(payload)
    note_type = payload.get("type", "unknown")

-    # Breakdown Berechnung (muss mit _compute_total_score korrelieren)
    breakdown = ScoreBreakdown(
        semantic_contribution=(sem_w * semantic_score * type_weight * status_mult),
        edge_contribution=(edge_w_cfg * edge_bonus),
@ -181,7 +180,6 @@ def _build_explanation(
    reasons: List[Reason] = []
    edges_dto: List[EdgeDTO] = []

-    # Reason Generation Logik (WP-04b)
    if semantic_score > 0.85:
        reasons.append(Reason(kind="semantic", message="Sehr hohe textuelle Übereinstimmung.", score_impact=breakdown.semantic_contribution))
    elif semantic_score > 0.70:
@ -191,13 +189,11 @@ def _build_explanation(
        msg = "Bevorzugt" if type_weight > 1.0 else "Leicht abgewertet"
        reasons.append(Reason(kind="type", message=f"{msg} aufgrund des Typs '{note_type}'.", score_impact=(sem_w * semantic_score * (type_weight - 1.0))))

-    # NEU: WP-22 Status Reason
    if status_mult != 1.0:
        msg = "Status-Bonus" if status_mult > 1.0 else "Status-Malus"
        reasons.append(Reason(kind="lifecycle", message=f"{msg} ({payload.get('status')}).", score_impact=0.0))

    if subgraph and node_key and edge_bonus > 0:
-        # Extrahiere Top-Kanten für die Erklärung
        if hasattr(subgraph, "get_outgoing_edges"):
            outgoing = subgraph.get_outgoing_edges(node_key)
            for edge in outgoing:
@ -230,7 +226,7 @@ def _build_explanation(


 def _extract_expand_options(req: QueryRequest) -> Tuple[int, List[str] | None]:
-    """Extrahiert depth und edge_types für Graph-Expansion."""
+    """Extrahiert depth und edge_types."""
    expand = getattr(req, "expand", None)
    if not expand:
        return 0, None
@ -263,7 +259,7 @@ def _build_hits_from_semantic(
    explain: bool = False,
    dynamic_edge_boosts: Dict[str, float] = None
 ) -> QueryResponse:
-    """Baut strukturierte QueryHits basierend auf Scoring (WP-22 & WP-04b)."""
+    """Baut strukturierte QueryHits."""
    t0 = time.time()
    enriched: List[Tuple[str, float, Dict[str, Any], float, float, float]] = []

@ -282,28 +278,27 @@ def _build_hits_from_semantic(
            except Exception:
                cent_bonus = 0.0

-        total, eb, cb = _compute_total_score(
+        total, edge_bonus, cent_bonus = _compute_total_score(
            semantic_score, 
            payload, 
            edge_bonus=edge_bonus, 
            cent_bonus=cent_bonus,
            dynamic_edge_boosts=dynamic_edge_boosts
        )
-        enriched.append((pid, float(semantic_score), payload, total, eb, cb))
+        enriched.append((pid, float(semantic_score), payload, total, edge_bonus, cent_bonus))

-    # Sort & Limit
    enriched_sorted = sorted(enriched, key=lambda h: h[3], reverse=True)
    limited = enriched_sorted[: max(1, top_k)]

    results: List[QueryHit] = []
-    for pid, semantic_score, payload, total, eb, cb in limited:
+    for pid, semantic_score, payload, total, edge_bonus, cent_bonus in limited:
        explanation_obj = None
        if explain:
            explanation_obj = _build_explanation(
                semantic_score=float(semantic_score),
                payload=payload,
-                edge_bonus=eb,
-                cent_bonus=cb,
+                edge_bonus=edge_bonus,
+                cent_bonus=cent_bonus,
                subgraph=subgraph,
                node_key=payload.get("chunk_id") or payload.get("note_id")
            )
@ -312,10 +307,10 @@ def _build_hits_from_semantic(

        results.append(QueryHit(
            node_id=str(pid),
-            note_id=payload.get("note_id", "unknown"),
+            note_id=payload.get("note_id"),
            semantic_score=float(semantic_score),
-            edge_bonus=eb,
-            centrality_bonus=cb,
+            edge_bonus=edge_bonus,
+            centrality_bonus=cent_bonus,
            total_score=total,
            paths=None,
            source={
@ -332,7 +327,7 @@ def _build_hits_from_semantic(


 def semantic_retrieve(req: QueryRequest) -> QueryResponse:
-    """Reiner semantischer Retriever (WP-02)."""
+    """Reiner semantischer Retriever."""
    client, prefix = _get_client_and_prefix()
    vector = _get_query_vector(req)
    top_k = req.top_k or get_settings().RETRIEVER_TOP_K
@ -342,44 +337,44 @@ def semantic_retrieve(req: QueryRequest) -> QueryResponse:


 def hybrid_retrieve(req: QueryRequest) -> QueryResponse:
-    """Hybrid-Retriever: semantische Suche + optionale Edge-Expansion (WP-04a)."""
+    """Hybrid-Retriever: semantische Suche + optionale Edge-Expansion."""
    client, prefix = _get_client_and_prefix()
-    
-    # 1. Semantische Suche
-    vector = list(req.query_vector) if req.query_vector else _get_query_vector(req)
+    if req.query_vector:
+        vector = list(req.query_vector)
+    else:
+        vector = _get_query_vector(req)
+
    top_k = req.top_k or get_settings().RETRIEVER_TOP_K
    hits = _semantic_hits(client, prefix, vector, top_k=top_k, filters=req.filters)

-    # 2. Graph Expansion & Custom Boosting (WP-22 Teil C)
    depth, edge_types = _extract_expand_options(req)
+    
+    # WP-22: Dynamic Boosts aus dem Request (vom Router)
    boost_edges = getattr(req, "boost_edges", {}) 

    subgraph: ga.Subgraph | None = None
    if depth and depth > 0:
        seed_ids: List[str] = []
        for _pid, _score, payload in hits:
-            key = payload.get("note_id")
+            key = payload.get("chunk_id") or payload.get("note_id")
            if key and key not in seed_ids:
                seed_ids.append(key)
-        
        if seed_ids:
            try:
-                # Subgraph laden
+                # Hier könnten wir boost_edges auch an expand übergeben, wenn ga.expand es unterstützt
                subgraph = ga.expand(client, prefix, seed_ids, depth=depth, edge_types=edge_types)
                
-                # --- WP-22: Kanten-Boosts im RAM-Graphen anwenden ---
-                # Dies manipuliert die Gewichte im Graphen, bevor der 'edge_bonus' berechnet wird.
+                # Manuelles Boosten der Kantengewichte im Graphen falls aktiv
                if boost_edges and subgraph and hasattr(subgraph, "graph"):
                     for u, v, data in subgraph.graph.edges(data=True):
                        k = data.get("kind")
                        if k in boost_edges:
-                            # Gewicht multiplizieren (z.B. caused_by * 3.0)
+                            # Gewicht erhöhen für diesen Query-Kontext
                            data["weight"] = data.get("weight", 1.0) * boost_edges[k]

            except Exception:
                subgraph = None

-    # 3. Scoring & Re-Ranking
    return _build_hits_from_semantic(
        hits, 
        top_k=top_k, 
@ -391,6 +386,11 @@ def hybrid_retrieve(req: QueryRequest) -> QueryResponse:


 class Retriever:
-    """Wrapper-Klasse für Suchoperationen."""
+    """
+    Wrapper-Klasse für WP-05 (Chat).
+    """
+    def __init__(self):
+        pass
+
    async def search(self, request: QueryRequest) -> QueryResponse:
        return hybrid_retrieve(request)
--- a/app/services/edge_registry.py
+++ b/app/services/edge_registry.py
@ -2,7 +2,7 @@
 FILE: app/services/edge_registry.py
 DESCRIPTION: Single Source of Truth für Kanten-Typen. Parst '01_User_Manual/01_edge_vocabulary.md'.
             WP-22 Teil B: Registry & Validation.
-             Beachtet den dynamischen Vault-Root aus ENV oder Parameter.
+             FIX: Dynamische Pfad-Auflösung basierend auf MINDNET_VAULT_ROOT.
 """
 import re
 import os
@ -25,7 +25,7 @@ class EdgeRegistry:
        if self.initialized: 
            return
            
-        # Priorität: 1. Parameter -> 2. ENV -> 3. Default
+        # Priorität: 1. Parameter (Test) -> 2. ENV (dotenv) -> 3. Default
        self.vault_root = vault_root or os.getenv("MINDNET_VAULT_ROOT", "./vault")
        self.vocab_rel_path = os.path.join("01_User_Manual", "01_edge_vocabulary.md")
        self.unknown_log_path = "data/logs/unknown_edges.jsonl"
@ -38,9 +38,11 @@ class EdgeRegistry:

    def _load_vocabulary(self):
        """Parst die Markdown-Tabelle im Vault."""
+        # Absoluten Pfad auflösen, um Verwechslungen im venv zu vermeiden
        full_path = os.path.abspath(os.path.join(self.vault_root, self.vocab_rel_path))
        
        if not os.path.exists(full_path):
+            # Debug-Info: Zeige wo genau gesucht wurde
            logger.warning(f"Edge Vocabulary NOT found at: {full_path}. Registry is empty.")
            return