From 48729e6f5dc5e3fe8d31d19e9991f38a822398b8 Mon Sep 17 00:00:00 2001 From: Lars Date: Thu, 18 Dec 2025 13:21:53 +0100 Subject: [PATCH] bug fix --- app/core/ingestion.py | 4 +- app/core/retriever.py | 80 +++++++++++++++++------------------ app/services/edge_registry.py | 6 ++- 3 files changed, 46 insertions(+), 44 deletions(-) diff --git a/app/core/ingestion.py b/app/core/ingestion.py index dd3ef13..6b3f232 100644 --- a/app/core/ingestion.py +++ b/app/core/ingestion.py @@ -162,7 +162,7 @@ class IngestionService: # --- WP-22: Content Lifecycle Gate --- status = fm.get("status", "draft").lower().strip() - # Hard Skip für System-Dateien (Teil A) + # Hard Skip für System-Dateien if status in ["system", "template", "archive", "hidden"]: logger.info(f"Skipping file {file_path} (Status: {status})") return {**result, "status": "skipped", "reason": f"lifecycle_status_{status}"} @@ -265,7 +265,7 @@ class IngestionService: except TypeError: raw_edges = build_edges_for_note(note_id, chunk_pls) - # --- WP-22: Edge Registry Validation (Teil B) --- + # --- WP-22: Edge Registry Validation --- edges = [] if raw_edges: for edge in raw_edges: diff --git a/app/core/retriever.py b/app/core/retriever.py index fe19b62..05fc309 100644 --- a/app/core/retriever.py +++ b/app/core/retriever.py @@ -98,7 +98,7 @@ def _semantic_hits( results.append((str(pid), float(score), dict(payload or {}))) return results -# --- WP-22 Helper: Lifecycle Multipliers (Teil A) --- +# --- WP-22 Helper: Lifecycle Multipliers --- def _get_status_multiplier(payload: Dict[str, Any]) -> float: """ WP-22: Drafts werden bestraft, Stable Notes belohnt. @@ -106,11 +106,10 @@ def _get_status_multiplier(payload: Dict[str, Any]) -> float: status = str(payload.get("status", "draft")).lower() if status == "stable": return 1.2 if status == "active": return 1.0 - if status == "draft": return 0.5 # Malus für Entwürfe + if status == "draft": return 0.8 # Malus für Entwürfe # Fallback für andere oder leere Status return 1.0 -# --- WP-22: Dynamic Scoring Formula (Teil C) --- def _compute_total_score( semantic_score: float, payload: Dict[str, Any], @@ -119,8 +118,8 @@ def _compute_total_score( dynamic_edge_boosts: Dict[str, float] = None ) -> Tuple[float, float, float]: """ - Berechnet total_score nach WP-22 Formel. - Score = (Sem * Type * Status) + (Weighted_Edge + Cent) + Berechnet total_score. + WP-22 Update: Integration von Status-Bonus und Dynamic Edge Boosts. """ raw_weight = payload.get("retriever_weight", 1.0) try: @@ -133,13 +132,13 @@ def _compute_total_score( sem_w, edge_w, cent_w = _get_scoring_weights() status_mult = _get_status_multiplier(payload) - # Dynamic Edge Boosting (Teil C) - # Wenn dynamische Boosts aktiv sind (durch den Router), verstärken wir den Graph-Bonus global. - # Der konkrete kanten-spezifische Boost passiert bereits im Subgraph (hybrid_retrieve). + # Dynamic Edge Boosting + # Wenn dynamische Boosts aktiv sind, erhöhen wir den Einfluss des Graphen + # Dies ist eine Vereinfachung, da der echte Boost im Subgraph passiert sein sollte. final_edge_score = edge_w * edge_bonus if dynamic_edge_boosts and edge_bonus > 0: - # Globaler Boost-Faktor falls Intention (z.B. WHY) vorliegt - final_edge_score *= 1.5 + # Globaler Boost für Graph-Signale bei spezifischen Intents + final_edge_score *= 1.2 total = (sem_w * float(semantic_score) * weight * status_mult) + final_edge_score + (cent_w * cent_bonus) return float(total), float(edge_bonus), float(cent_bonus) @@ -155,8 +154,9 @@ def _build_explanation( subgraph: Optional[ga.Subgraph], node_key: Optional[str] ) -> Explanation: - """Erstellt ein Explanation-Objekt (WP-04b).""" + """Erstellt ein Explanation-Objekt.""" sem_w, _edge_w, _cent_w = _get_scoring_weights() + # Scoring weights erneut laden für Reason-Details _, edge_w_cfg, cent_w_cfg = _get_scoring_weights() try: @@ -167,7 +167,6 @@ def _build_explanation( status_mult = _get_status_multiplier(payload) note_type = payload.get("type", "unknown") - # Breakdown Berechnung (muss mit _compute_total_score korrelieren) breakdown = ScoreBreakdown( semantic_contribution=(sem_w * semantic_score * type_weight * status_mult), edge_contribution=(edge_w_cfg * edge_bonus), @@ -181,7 +180,6 @@ def _build_explanation( reasons: List[Reason] = [] edges_dto: List[EdgeDTO] = [] - # Reason Generation Logik (WP-04b) if semantic_score > 0.85: reasons.append(Reason(kind="semantic", message="Sehr hohe textuelle Übereinstimmung.", score_impact=breakdown.semantic_contribution)) elif semantic_score > 0.70: @@ -191,13 +189,11 @@ def _build_explanation( msg = "Bevorzugt" if type_weight > 1.0 else "Leicht abgewertet" reasons.append(Reason(kind="type", message=f"{msg} aufgrund des Typs '{note_type}'.", score_impact=(sem_w * semantic_score * (type_weight - 1.0)))) - # NEU: WP-22 Status Reason if status_mult != 1.0: msg = "Status-Bonus" if status_mult > 1.0 else "Status-Malus" reasons.append(Reason(kind="lifecycle", message=f"{msg} ({payload.get('status')}).", score_impact=0.0)) if subgraph and node_key and edge_bonus > 0: - # Extrahiere Top-Kanten für die Erklärung if hasattr(subgraph, "get_outgoing_edges"): outgoing = subgraph.get_outgoing_edges(node_key) for edge in outgoing: @@ -230,7 +226,7 @@ def _build_explanation( def _extract_expand_options(req: QueryRequest) -> Tuple[int, List[str] | None]: - """Extrahiert depth und edge_types für Graph-Expansion.""" + """Extrahiert depth und edge_types.""" expand = getattr(req, "expand", None) if not expand: return 0, None @@ -263,7 +259,7 @@ def _build_hits_from_semantic( explain: bool = False, dynamic_edge_boosts: Dict[str, float] = None ) -> QueryResponse: - """Baut strukturierte QueryHits basierend auf Scoring (WP-22 & WP-04b).""" + """Baut strukturierte QueryHits.""" t0 = time.time() enriched: List[Tuple[str, float, Dict[str, Any], float, float, float]] = [] @@ -282,28 +278,27 @@ def _build_hits_from_semantic( except Exception: cent_bonus = 0.0 - total, eb, cb = _compute_total_score( + total, edge_bonus, cent_bonus = _compute_total_score( semantic_score, payload, edge_bonus=edge_bonus, cent_bonus=cent_bonus, dynamic_edge_boosts=dynamic_edge_boosts ) - enriched.append((pid, float(semantic_score), payload, total, eb, cb)) + enriched.append((pid, float(semantic_score), payload, total, edge_bonus, cent_bonus)) - # Sort & Limit enriched_sorted = sorted(enriched, key=lambda h: h[3], reverse=True) limited = enriched_sorted[: max(1, top_k)] results: List[QueryHit] = [] - for pid, semantic_score, payload, total, eb, cb in limited: + for pid, semantic_score, payload, total, edge_bonus, cent_bonus in limited: explanation_obj = None if explain: explanation_obj = _build_explanation( semantic_score=float(semantic_score), payload=payload, - edge_bonus=eb, - cent_bonus=cb, + edge_bonus=edge_bonus, + cent_bonus=cent_bonus, subgraph=subgraph, node_key=payload.get("chunk_id") or payload.get("note_id") ) @@ -312,10 +307,10 @@ def _build_hits_from_semantic( results.append(QueryHit( node_id=str(pid), - note_id=payload.get("note_id", "unknown"), + note_id=payload.get("note_id"), semantic_score=float(semantic_score), - edge_bonus=eb, - centrality_bonus=cb, + edge_bonus=edge_bonus, + centrality_bonus=cent_bonus, total_score=total, paths=None, source={ @@ -332,7 +327,7 @@ def _build_hits_from_semantic( def semantic_retrieve(req: QueryRequest) -> QueryResponse: - """Reiner semantischer Retriever (WP-02).""" + """Reiner semantischer Retriever.""" client, prefix = _get_client_and_prefix() vector = _get_query_vector(req) top_k = req.top_k or get_settings().RETRIEVER_TOP_K @@ -342,44 +337,44 @@ def semantic_retrieve(req: QueryRequest) -> QueryResponse: def hybrid_retrieve(req: QueryRequest) -> QueryResponse: - """Hybrid-Retriever: semantische Suche + optionale Edge-Expansion (WP-04a).""" + """Hybrid-Retriever: semantische Suche + optionale Edge-Expansion.""" client, prefix = _get_client_and_prefix() - - # 1. Semantische Suche - vector = list(req.query_vector) if req.query_vector else _get_query_vector(req) + if req.query_vector: + vector = list(req.query_vector) + else: + vector = _get_query_vector(req) + top_k = req.top_k or get_settings().RETRIEVER_TOP_K hits = _semantic_hits(client, prefix, vector, top_k=top_k, filters=req.filters) - # 2. Graph Expansion & Custom Boosting (WP-22 Teil C) depth, edge_types = _extract_expand_options(req) + + # WP-22: Dynamic Boosts aus dem Request (vom Router) boost_edges = getattr(req, "boost_edges", {}) subgraph: ga.Subgraph | None = None if depth and depth > 0: seed_ids: List[str] = [] for _pid, _score, payload in hits: - key = payload.get("note_id") + key = payload.get("chunk_id") or payload.get("note_id") if key and key not in seed_ids: seed_ids.append(key) - if seed_ids: try: - # Subgraph laden + # Hier könnten wir boost_edges auch an expand übergeben, wenn ga.expand es unterstützt subgraph = ga.expand(client, prefix, seed_ids, depth=depth, edge_types=edge_types) - # --- WP-22: Kanten-Boosts im RAM-Graphen anwenden --- - # Dies manipuliert die Gewichte im Graphen, bevor der 'edge_bonus' berechnet wird. + # Manuelles Boosten der Kantengewichte im Graphen falls aktiv if boost_edges and subgraph and hasattr(subgraph, "graph"): for u, v, data in subgraph.graph.edges(data=True): k = data.get("kind") if k in boost_edges: - # Gewicht multiplizieren (z.B. caused_by * 3.0) + # Gewicht erhöhen für diesen Query-Kontext data["weight"] = data.get("weight", 1.0) * boost_edges[k] except Exception: subgraph = None - # 3. Scoring & Re-Ranking return _build_hits_from_semantic( hits, top_k=top_k, @@ -391,6 +386,11 @@ def hybrid_retrieve(req: QueryRequest) -> QueryResponse: class Retriever: - """Wrapper-Klasse für Suchoperationen.""" + """ + Wrapper-Klasse für WP-05 (Chat). + """ + def __init__(self): + pass + async def search(self, request: QueryRequest) -> QueryResponse: return hybrid_retrieve(request) \ No newline at end of file diff --git a/app/services/edge_registry.py b/app/services/edge_registry.py index b58d1b3..be63332 100644 --- a/app/services/edge_registry.py +++ b/app/services/edge_registry.py @@ -2,7 +2,7 @@ FILE: app/services/edge_registry.py DESCRIPTION: Single Source of Truth für Kanten-Typen. Parst '01_User_Manual/01_edge_vocabulary.md'. WP-22 Teil B: Registry & Validation. - Beachtet den dynamischen Vault-Root aus ENV oder Parameter. + FIX: Dynamische Pfad-Auflösung basierend auf MINDNET_VAULT_ROOT. """ import re import os @@ -25,7 +25,7 @@ class EdgeRegistry: if self.initialized: return - # Priorität: 1. Parameter -> 2. ENV -> 3. Default + # Priorität: 1. Parameter (Test) -> 2. ENV (dotenv) -> 3. Default self.vault_root = vault_root or os.getenv("MINDNET_VAULT_ROOT", "./vault") self.vocab_rel_path = os.path.join("01_User_Manual", "01_edge_vocabulary.md") self.unknown_log_path = "data/logs/unknown_edges.jsonl" @@ -38,9 +38,11 @@ class EdgeRegistry: def _load_vocabulary(self): """Parst die Markdown-Tabelle im Vault.""" + # Absoluten Pfad auflösen, um Verwechslungen im venv zu vermeiden full_path = os.path.abspath(os.path.join(self.vault_root, self.vocab_rel_path)) if not os.path.exists(full_path): + # Debug-Info: Zeige wo genau gesucht wurde logger.warning(f"Edge Vocabulary NOT found at: {full_path}. Registry is empty.") return