Merge pull request 'WP06a' (#5) from WP06a into main

Reviewed-on: #5 fix(wp06a): Fix API Payload-Enrichment & CPU Timeouts Stabilisierungspatch für die Decision Engine (Post-WP06). **Bugfixes & Verbesserungen:** - **FIX:** API (Retriever) liefert nun `payload`-Daten im `QueryHit` zurück. Behebt "Typ: unknown" Fehler in Clients/Tests. - **FIX:** Test-Skript (`test_wp06_decision.py`) prüft nun robust auf `payload` oder `source` und zeigt Intent-Source an. - **FIX:** Timeout für LLM-Inference in `.env` konfigurierbar gemacht (`MINDNET_LLM_TIMEOUT`) und Default für Tests auf 300s erhöht (Cold-Start Protection). - **CHORE:** DTOs erweitert um `intent_source` für besseres Tracing (Keyword vs. LLM). - **DOCS:** Technische Architektur und Appendices aktualisiert (Chunk-Schema enthält nun explizit `type`). **Version Bump:** v2.3.1 -> v2.3.2
2025-12-09 18:25:36 +01:00 · 2025-12-09 18:25:36 +01:00 · 046aa2cf48
commit 046aa2cf48
parent d2270fafdd 0bc0e66a85
6 changed files with 59 additions and 50 deletions
--- a/app/core/retriever.py
+++ b/app/core/retriever.py
@ -2,7 +2,7 @@
 app/core/retriever.py — Hybrider Such-Algorithmus

 Version:
-    0.5.2 (WP-05 Fix: Pass content in QueryHit source)
+    0.5.3 (WP-06 Fix: Populate 'payload' in QueryHit for meta-data access)
 """
 from __future__ import annotations

@ -127,7 +127,9 @@ def _build_explanation(
    node_key: Optional[str]
 ) -> Explanation:
    """Erstellt ein Explanation-Objekt."""
-    sem_w, edge_w, cent_w = _get_scoring_weights()
+    sem_w, _edge_w, _cent_w = _get_scoring_weights()
+    # Scoring weights erneut laden für Reason-Details
+    _, edge_w_cfg, cent_w_cfg = _get_scoring_weights()
    
    try:
        type_weight = float(payload.get("retriever_weight", 1.0))
@ -138,8 +140,8 @@ def _build_explanation(

    breakdown = ScoreBreakdown(
        semantic_contribution=(sem_w * semantic_score * type_weight),
-        edge_contribution=(edge_w * edge_bonus),
-        centrality_contribution=(cent_w * cent_bonus),
+        edge_contribution=(edge_w_cfg * edge_bonus),
+        centrality_contribution=(cent_w_cfg * cent_bonus),
        raw_semantic=semantic_score,
        raw_edge_bonus=edge_bonus,
        raw_centrality=cent_bonus,
@ -179,7 +181,7 @@ def _build_explanation(

        all_edges = sorted(edges_dto, key=lambda e: e.weight, reverse=True)
        for top_edge in all_edges[:3]:
-            impact = edge_w * top_edge.weight
+            impact = edge_w_cfg * top_edge.weight
            dir_txt = "Verweist auf" if top_edge.direction == "out" else "Referenziert von"
            tgt_txt = top_edge.target if top_edge.direction == "out" else top_edge.source
            reasons.append(Reason(kind="edge", message=f"{dir_txt} '{tgt_txt}' via '{top_edge.kind}'", score_impact=impact, details={"kind": top_edge.kind}))
@ -261,7 +263,6 @@ def _build_hits_from_semantic(
                node_key=payload.get("chunk_id") or payload.get("note_id")
            )

-        # FIX: Hier holen wir jetzt den Textinhalt (text, content oder page_content) aus dem Payload
        text_content = payload.get("page_content") or payload.get("text") or payload.get("content")

        results.append(QueryHit(
@ -275,8 +276,10 @@ def _build_hits_from_semantic(
            source={
                "path": payload.get("path"),
                "section": payload.get("section") or payload.get("section_title"),
-                "text": text_content  # WICHTIG: Inhalt durchreichen
+                "text": text_content
            },
+            # --- FIX: Wir füllen das payload-Feld explizit ---
+            payload=payload, 
            explanation=explanation_obj
        ))

--- a/app/models/dto.py
+++ b/app/models/dto.py
@ -3,12 +3,12 @@ app/models/dto.py — Pydantic-Modelle (DTOs) für WP-04/WP-05/WP-06

 Zweck:
    Laufzeit-Modelle für FastAPI (Requests/Responses).
-    WP-06 Update: Intent in ChatResponse.
+    WP-06 Update: Intent & Intent-Source in ChatResponse.

 Version:
-    0.6.0 (WP-06: Decision Engine)
+    0.6.1 (WP-06: Decision Engine Transparency)
 Stand:
-    2025-12-08
+    2025-12-09
 """

 from __future__ import annotations
@ -123,6 +123,7 @@ class QueryHit(BaseModel):
    total_score: float
    paths: Optional[List[List[Dict]]] = None
    source: Optional[Dict] = None
+    payload: Optional[Dict] = None # Added for flexibility & WP-06 meta-data
    explanation: Optional[Explanation] = None


@ -151,3 +152,4 @@ class ChatResponse(BaseModel):
    sources: List[QueryHit] = Field(..., description="Die für die Antwort genutzten Quellen")
    latency_ms: int
    intent: Optional[str] = Field("FACT", description="WP-06: Erkannter Intent (FACT/DECISION)")
+    intent_source: Optional[str] = Field("Unknown", description="WP-06: Quelle der Intent-Erkennung (Keyword vs. LLM)")
--- a/app/routers/chat.py
+++ b/app/routers/chat.py
@ -1,6 +1,6 @@
 """
-app/routers/chat.py — RAG Endpunkt (WP-06 Hybrid Router v2)
-Update: Robusteres LLM-Parsing für Small Language Models (SLMs).
+app/routers/chat.py — RAG Endpunkt (WP-06 Hybrid Router v3)
+Update: Transparenz über Intent-Source (Keyword vs. LLM).
 """

 from fastapi import APIRouter, HTTPException, Depends
@ -76,7 +76,11 @@ def _build_enriched_context(hits: List[QueryHit]) -> str:
            "[Kein Text]"
        )
        title = hit.note_id or "Unbekannt"
-        note_type = source.get("type", "unknown").upper()
+        
+        # FIX: Wir holen den Typ aus Payload oder Source (Fallback)
+        payload = hit.payload or {}
+        note_type = payload.get("type") or source.get("type", "unknown")
+        note_type = str(note_type).upper()
        
        entry = (
            f"### QUELLE {i}: {title}\n"
@ -87,11 +91,10 @@ def _build_enriched_context(hits: List[QueryHit]) -> str:
    
    return "\n\n".join(context_parts)

-async def _classify_intent(query: str, llm: LLMService) -> str:
+async def _classify_intent(query: str, llm: LLMService) -> tuple[str, str]:
    """
-    Hybrid Router v2: 
-    1. Keyword Check (Best/Longest Match) -> FAST
-    2. LLM Fallback (Robust Parsing) -> SMART
+    Hybrid Router v3: 
+    Gibt Tuple zurück: (Intent, Source)
    """
    config = get_full_config()
    strategies = config.get("strategies", {})
@ -112,8 +115,7 @@ async def _classify_intent(query: str, llm: LLMService) -> str:
                    best_intent = intent_name
    
    if best_intent:
-        logger.info(f"Intent detected via KEYWORD: {best_intent}")
-        return best_intent
+        return best_intent, "Keyword (Fast Path)"

    # 2. SLOW PATH: LLM Router
    if settings.get("llm_fallback_enabled", False):
@ -122,35 +124,23 @@ async def _classify_intent(query: str, llm: LLMService) -> str:
            prompt = router_prompt_template.replace("{query}", query)
            logger.info("Keywords failed. Asking LLM for Intent...")
            
-            # Kurzer Raw Call
            raw_response = await llm.generate_raw_response(prompt)
            
-            # --- Robust Parsing für SLMs ---
-            # Wir suchen nach den bekannten Strategie-Namen im Output
+            # Parsing logic
            llm_output_upper = raw_response.upper()
-            logger.info(f"LLM Router Raw Output: '{raw_response}'") # Debugging
-            
            found_intents = []
            for strat_key in strategies.keys():
-                # Wir prüfen, ob der Strategie-Name (z.B. "EMPATHY") im Text vorkommt
                if strat_key in llm_output_upper:
                    found_intents.append(strat_key)
            
-            # Entscheidung
-            final_intent = "FACT"
            if len(found_intents) == 1:
-                # Eindeutiger Treffer
-                final_intent = found_intents[0]
-                logger.info(f"Intent detected via LLM (Parsed): {final_intent}")
-                return final_intent
+                return found_intents[0], "LLM Router (Slow Path)"
            elif len(found_intents) > 1:
-                # Mehrere Treffer (z.B. "Es ist FACT oder DECISION") -> Nimm den ersten oder Fallback
-                logger.warning(f"LLM returned multiple intents {found_intents}. Using first match: {found_intents[0]}")
-                return found_intents[0]
+                return found_intents[0], f"LLM Ambiguous {found_intents}"
            else:
-                logger.warning(f"LLM did not return a valid strategy name. Falling back to FACT.")
+                return "FACT", "LLM Fallback (No Match)"
                
-    return "FACT"
+    return "FACT", "Default (No Match)"

@router.post("/", response_model=ChatResponse)
 async def chat_endpoint(
@ -163,9 +153,9 @@ async def chat_endpoint(
    logger.info(f"Chat request [{query_id}]: {request.message[:50]}...")

    try:
-        # 1. Intent Detection
-        intent = await _classify_intent(request.message, llm)
-        logger.info(f"[{query_id}] Final Intent: {intent}")
+        # 1. Intent Detection (mit Source)
+        intent, intent_source = await _classify_intent(request.message, llm)
+        logger.info(f"[{query_id}] Final Intent: {intent} via {intent_source}")

        # Strategy Load
        strategy = get_decision_strategy(intent)
@ -227,7 +217,8 @@ async def chat_endpoint(
            answer=answer_text,
            sources=hits,
            latency_ms=duration_ms,
-            intent=intent
+            intent=intent,
+            intent_source=intent_source # Source durchreichen
        )

    except Exception as e:
--- a/docs/appendix.md
+++ b/docs/appendix.md
@ -71,6 +71,7 @@ Diese sind die Felder, die effektiv in Qdrant gespeichert werden.
    {
      "chunk_id": "string (keyword)",      // Format: {note_id}#c{index}
      "note_id": "string (keyword)",       // FK zur Note
+      "type": "string (keyword)",          // Typ-Kopie aus Note (Neu in WP06a)
      "text": "string (text)",             // Reintext für Anzeige (ohne Overlap)
      "window": "string (text)",           // Text + Overlap (für Embedding)
      "ord": "integer",                    // Laufende Nummer (1..N)
--- a/docs/mindnet_technical_architecture.md
+++ b/docs/mindnet_technical_architecture.md
@ -134,6 +134,7 @@ Die atomaren Sucheinheiten.
    | :--- | :--- | :--- |
    | `chunk_id` | Keyword | Deterministisch: `{note_id}#c{index:02d}`. |
    | `note_id` | Keyword | Referenz zur Note. |
+    | `type` | Keyword | **Kopie des Note-Typs** (Denormalisiert für Filter). |
    | `text` | Text | **Reiner Inhalt** (ohne Overlap). Anzeige-Text. |
    | `window` | Text | **Kontext-Fenster** (mit Overlap). Embedding-Basis. |
    | `ord` | Integer | Sortierreihenfolge (1..N). |
@ -336,7 +337,7 @@ Damit Qdrant performant bleibt, sind Payload-Indizes essenziell.

 **Erforderliche Indizes:**
 * **Notes:** `note_id`, `type`, `tags`.
-* **Chunks:** `note_id`, `chunk_id`.
+* **Chunks:** `note_id`, `chunk_id`, `type`.
 * **Edges:** `source_id`, `target_id`, `kind`, `scope`, `note_id`.

 Validierung erfolgt über `tests/ensure_indexes_and_show.py`.
--- a/tests/test_wp06_decision.py
+++ b/tests/test_wp06_decision.py
@ -1,8 +1,10 @@
 """
 tests/test_wp06_decision.py — Flexibler Integrationstest für WP-06
 Update: 
- Timeout auf 300s erhöht.
- Robusteres Auslesen der Metadaten (Payload/Source Fix).
+- Timeout 300s.
+- Zeigt Intent Source an.
+- Payload/Source Fallback für Metadaten.
+- Debug-Dump bei unknown Type.
 """
 import requests
 import json
@ -24,7 +26,7 @@ def test_decision_engine(query: str, port: int, expected_intent: str):
        print(f"FRAGE: '{query}'")
        print("... warte auf LLM (kann auf CPU >120s dauern) ...")
        
-        # FIX: Timeout auf 300 erhöht, passend zur Server-Config
+        # FIX: Timeout auf 300 erhöht
        response = requests.post(f"{api_url}/chat/", json=payload, timeout=300)
        response.raise_for_status()
        data = response.json()
@ -33,11 +35,14 @@ def test_decision_engine(query: str, port: int, expected_intent: str):

        # 1. Intent Check
        intent = data.get("intent", "UNKNOWN")
-        # Wir normalisieren auf Großbuchstaben für den Vergleich
+        source_method = data.get("intent_source", "Unknown Source")
+        
        match = intent.upper() == expected_intent.upper()
        
        print(f"\n1. INTENT DETECTION: [{'✅' if match else '❌'}]")
-        print(f"   Erkannt: {intent} (Erwartet: {expected_intent})")
+        print(f"   Erkannt: {intent}")
+        print(f"   Erwartet: {expected_intent}")
+        print(f"   Methode: {source_method}")

        # 2. Source Check (Strategic Retrieval)
        sources = data.get("sources", [])
@ -48,19 +53,25 @@ def test_decision_engine(query: str, port: int, expected_intent: str):
        if not sources:
            print("   (Keine Quellen gefunden)")

+        debug_printed = False
+
        for i, source in enumerate(sources):
            # --- FIX: Robusterer Zugriff auf Metadaten ---
-            # Qdrant liefert Daten oft in 'payload', Mindnet DTOs manchmal in 'source'
-            # Wir prüfen beides, um "Typ: unknown" zu vermeiden.
            src_meta = source.get("payload") or source.get("source") or {}
            
            node_type = src_meta.get("type", "unknown")
            title = source.get("note_id", "Unknown")
            score = source.get("total_score", 0.0)
            
+            # DEBUG: Wenn Typ unknown ist, dumpen wir das erste Objekt
+            if node_type == "unknown" and not debug_printed:
+                print(f"\n   🔴 DEBUG: Raw Data von Quelle {i+1} (da Typ unknown):")
+                print(json.dumps(source, indent=2, ensure_ascii=False))
+                print("   ------------------------------------------------")
+                debug_printed = True
+
            # Marker für Ausgabe
            marker = "  "
-            # Liste aller strategischen Typen, die wir besonders hervorheben wollen
            if node_type in ["value", "principle", "goal", "experience", "belief", "profile", "decision"]:
                marker = "🎯" # Strategischer Treffer
                strategic_hits.append(f"{title} ({node_type})")