bug fix

2025-12-18 17:15:43 +01:00 · 2025-12-18 17:15:43 +01:00 · ba46957556
commit ba46957556
parent 33b0c83c87
2 changed files with 30 additions and 12 deletions
--- a/app/core/retriever.py
+++ b/app/core/retriever.py
@ -2,7 +2,9 @@
 FILE: app/core/retriever.py
 DESCRIPTION: Haupt-Schnittstelle für die Suche. Orchestriert Vektorsuche und Graph-Expansion.
             Nutzt retriever_scoring.py für die WP-22 Logik.
-VERSION: 0.6.14 (WP-22 Full, Debug & Stable)
+             FIX: TypeError in embed_text (model_name) behoben.
+             FIX: Pydantic ValidationError (Target/Source) behoben.
+VERSION: 0.6.15 (WP-22 Full & Stable)
 STATUS: Active
 DEPENDENCIES: app.config, app.models.dto, app.core.qdrant*, app.core.graph_adapter, app.core.retriever_scoring
 """
@ -28,22 +30,36 @@ from app.core.retriever_scoring import get_weights, compute_wp22_score

 logger = logging.getLogger(__name__)

-# --- Hilfsfunktionen für Qdrant ---
+# ==============================================================================
+# 1. CORE HELPERS & CONFIG LOADERS
+# ==============================================================================

 def _get_client_and_prefix() -> Tuple[Any, str]:
    """Initialisiert Qdrant Client und lädt Collection-Prefix."""
    cfg = qdr.QdrantConfig.from_env()
    return qdr.get_client(cfg), cfg.prefix

+
 def _get_query_vector(req: QueryRequest) -> List[float]:
-    """Vektorisiert die Anfrage oder nutzt vorhandenen Vektor."""
+    """
+    Vektorisiert die Anfrage. 
+    FIX: Enthält try-except Block für unterschiedliche Signaturen von ec.embed_text.
+    """
    if req.query_vector:
        return list(req.query_vector)
    if not req.query:
        raise ValueError("Kein Text oder Vektor für die Suche angegeben.")
    
    settings = get_settings()
-    return ec.embed_text(req.query, model_name=settings.MODEL_NAME)
+    
+    try:
+        # Versuch mit modernem Interface (WP-03 kompatibel)
+        return ec.embed_text(req.query, model_name=settings.MODEL_NAME)
+    except TypeError:
+        # Fallback für Signaturen, die 'model_name' nicht als Keyword akzeptieren
+        logger.debug("ec.embed_text does not accept 'model_name' keyword. Falling back.")
+        return ec.embed_text(req.query)
+

 def _semantic_hits(
    client: Any, 
@ -57,7 +73,9 @@ def _semantic_hits(
    # Strikte Typkonvertierung für Stabilität
    return [(str(hit[0]), float(hit[1]), dict(hit[2] or {})) for hit in raw_hits]

-# --- Explanation Layer (Detaillierte Begründungen) ---
+# ==============================================================================
+# 2. EXPLANATION LAYER (DEBUG & VERIFIABILITY)
+# ==============================================================================

 def _build_explanation(
    semantic_score: float,
@ -100,7 +118,7 @@ def _build_explanation(
    type_weight = float(payload.get("retriever_weight", 1.0))
    if type_weight != 1.0:
        msg = "Bevorzugt" if type_weight > 1.0 else "De-priorisiert"
-        reasons.append(Reason(kind="type", message=f"{msg} aufgrund des Notiz-Typs.", score_impact=base_val * (type_weight - 1.0)))
+        reasons.append(Reason(kind="type", message=f"{msg} durch Typ-Profil.", score_impact=base_val * (type_weight - 1.0)))

    # 4. Kanten-Verarbeitung (Graph-Intelligence)
    if subgraph and target_note_id and scoring_debug["edge_bonus"] > 0:
@ -155,7 +173,9 @@ def _build_explanation(
        applied_boosts=applied_boosts
    )

-# --- Kern-Logik für Hybrid-Retrieval ---
+# ==============================================================================
+# 3. CORE RETRIEVAL PIPELINE
+# ==============================================================================

 def _build_hits_from_semantic(
    hits: Iterable[Tuple[str, float, Dict[str, Any]]],
@ -171,7 +191,6 @@ def _build_hits_from_semantic(

    for pid, semantic_score, payload in hits:
        edge_bonus, cent_bonus = 0.0, 0.0
-        # Graph-Abfrage erfolgt IMMER über die Note-ID, nicht Chunk-ID
        target_id = payload.get("note_id")
        
        if subgraph and target_id:
@ -269,7 +288,7 @@ def hybrid_retrieve(req: QueryRequest) -> QueryResponse:
                        data["weight"] = data.get("weight", 1.0) * prov_w * intent_multiplier

            except Exception as e:
-                logger.error(f"Graph Expansion failed criticaly: {e}", exc_info=True)
+                logger.error(f"Graph Expansion failed: {e}")
                subgraph = None

    # 3. Scoring & Explanation Generierung
@ -287,6 +306,5 @@ def semantic_retrieve(req: QueryRequest) -> QueryResponse:
 class Retriever:
    """Schnittstelle für die asynchrone Suche."""
    async def search(self, request: QueryRequest) -> QueryResponse:
-        """Führt eine Suche durch. Nutzt hybrid_retrieve als Standard."""
-        # Standard ist Hybrid-Modus
+        """Führt eine hybride Suche aus."""
        return hybrid_retrieve(request)
--- a/app/core/retriever_scoring.py
+++ b/app/core/retriever_scoring.py
@ -101,7 +101,7 @@ def compute_wp22_score(
    cent_impact_final = (cent_w_cfg * cent_bonus_raw) * graph_boost_factor
    
    # 4. Finales Zusammenführen (Merging)
-    # node_weight - 1.0 sorgt dafür, dass ein Gewicht von 1.0 keinen Einfluss hat (neutral).
+    # (node_weight - 1.0) sorgt dafür, dass ein Gewicht von 1.0 keinen Einfluss hat (neutral).
    total = base_val * (1.0 + (node_weight - 1.0) + edge_impact_final + cent_impact_final)
    
    # Sicherstellen, dass der Score niemals 0 oder negativ ist (Floor)