diff --git a/app/core/retriever.py b/app/core/retriever.py index 733ab6d..234d32c 100644 --- a/app/core/retriever.py +++ b/app/core/retriever.py @@ -2,7 +2,7 @@ app/core/retriever.py — Hybrider Such-Algorithmus Version: - 0.5.2 (WP-05 Fix: Pass content in QueryHit source) + 0.5.3 (WP-06 Fix: Populate 'payload' in QueryHit for meta-data access) """ from __future__ import annotations @@ -127,7 +127,9 @@ def _build_explanation( node_key: Optional[str] ) -> Explanation: """Erstellt ein Explanation-Objekt.""" - sem_w, edge_w, cent_w = _get_scoring_weights() + sem_w, _edge_w, _cent_w = _get_scoring_weights() + # Scoring weights erneut laden für Reason-Details + _, edge_w_cfg, cent_w_cfg = _get_scoring_weights() try: type_weight = float(payload.get("retriever_weight", 1.0)) @@ -138,8 +140,8 @@ def _build_explanation( breakdown = ScoreBreakdown( semantic_contribution=(sem_w * semantic_score * type_weight), - edge_contribution=(edge_w * edge_bonus), - centrality_contribution=(cent_w * cent_bonus), + edge_contribution=(edge_w_cfg * edge_bonus), + centrality_contribution=(cent_w_cfg * cent_bonus), raw_semantic=semantic_score, raw_edge_bonus=edge_bonus, raw_centrality=cent_bonus, @@ -179,7 +181,7 @@ def _build_explanation( all_edges = sorted(edges_dto, key=lambda e: e.weight, reverse=True) for top_edge in all_edges[:3]: - impact = edge_w * top_edge.weight + impact = edge_w_cfg * top_edge.weight dir_txt = "Verweist auf" if top_edge.direction == "out" else "Referenziert von" tgt_txt = top_edge.target if top_edge.direction == "out" else top_edge.source reasons.append(Reason(kind="edge", message=f"{dir_txt} '{tgt_txt}' via '{top_edge.kind}'", score_impact=impact, details={"kind": top_edge.kind})) @@ -261,7 +263,6 @@ def _build_hits_from_semantic( node_key=payload.get("chunk_id") or payload.get("note_id") ) - # FIX: Hier holen wir jetzt den Textinhalt (text, content oder page_content) aus dem Payload text_content = payload.get("page_content") or payload.get("text") or payload.get("content") results.append(QueryHit( @@ -275,8 +276,10 @@ def _build_hits_from_semantic( source={ "path": payload.get("path"), "section": payload.get("section") or payload.get("section_title"), - "text": text_content # WICHTIG: Inhalt durchreichen + "text": text_content }, + # --- FIX: Wir füllen das payload-Feld explizit --- + payload=payload, explanation=explanation_obj )) diff --git a/app/models/dto.py b/app/models/dto.py index c0e928c..85767f8 100644 --- a/app/models/dto.py +++ b/app/models/dto.py @@ -3,12 +3,12 @@ app/models/dto.py — Pydantic-Modelle (DTOs) für WP-04/WP-05/WP-06 Zweck: Laufzeit-Modelle für FastAPI (Requests/Responses). - WP-06 Update: Intent in ChatResponse. + WP-06 Update: Intent & Intent-Source in ChatResponse. Version: - 0.6.0 (WP-06: Decision Engine) + 0.6.1 (WP-06: Decision Engine Transparency) Stand: - 2025-12-08 + 2025-12-09 """ from __future__ import annotations @@ -123,6 +123,7 @@ class QueryHit(BaseModel): total_score: float paths: Optional[List[List[Dict]]] = None source: Optional[Dict] = None + payload: Optional[Dict] = None # Added for flexibility & WP-06 meta-data explanation: Optional[Explanation] = None @@ -150,4 +151,5 @@ class ChatResponse(BaseModel): answer: str = Field(..., description="Generierte Antwort vom LLM") sources: List[QueryHit] = Field(..., description="Die für die Antwort genutzten Quellen") latency_ms: int - intent: Optional[str] = Field("FACT", description="WP-06: Erkannter Intent (FACT/DECISION)") \ No newline at end of file + intent: Optional[str] = Field("FACT", description="WP-06: Erkannter Intent (FACT/DECISION)") + intent_source: Optional[str] = Field("Unknown", description="WP-06: Quelle der Intent-Erkennung (Keyword vs. LLM)") \ No newline at end of file diff --git a/app/routers/chat.py b/app/routers/chat.py index 0e01dc6..58f5c82 100644 --- a/app/routers/chat.py +++ b/app/routers/chat.py @@ -1,6 +1,6 @@ """ -app/routers/chat.py — RAG Endpunkt (WP-06 Hybrid Router v2) -Update: Robusteres LLM-Parsing für Small Language Models (SLMs). +app/routers/chat.py — RAG Endpunkt (WP-06 Hybrid Router v3) +Update: Transparenz über Intent-Source (Keyword vs. LLM). """ from fastapi import APIRouter, HTTPException, Depends @@ -76,7 +76,11 @@ def _build_enriched_context(hits: List[QueryHit]) -> str: "[Kein Text]" ) title = hit.note_id or "Unbekannt" - note_type = source.get("type", "unknown").upper() + + # FIX: Wir holen den Typ aus Payload oder Source (Fallback) + payload = hit.payload or {} + note_type = payload.get("type") or source.get("type", "unknown") + note_type = str(note_type).upper() entry = ( f"### QUELLE {i}: {title}\n" @@ -87,11 +91,10 @@ def _build_enriched_context(hits: List[QueryHit]) -> str: return "\n\n".join(context_parts) -async def _classify_intent(query: str, llm: LLMService) -> str: +async def _classify_intent(query: str, llm: LLMService) -> tuple[str, str]: """ - Hybrid Router v2: - 1. Keyword Check (Best/Longest Match) -> FAST - 2. LLM Fallback (Robust Parsing) -> SMART + Hybrid Router v3: + Gibt Tuple zurück: (Intent, Source) """ config = get_full_config() strategies = config.get("strategies", {}) @@ -112,8 +115,7 @@ async def _classify_intent(query: str, llm: LLMService) -> str: best_intent = intent_name if best_intent: - logger.info(f"Intent detected via KEYWORD: {best_intent}") - return best_intent + return best_intent, "Keyword (Fast Path)" # 2. SLOW PATH: LLM Router if settings.get("llm_fallback_enabled", False): @@ -122,35 +124,23 @@ async def _classify_intent(query: str, llm: LLMService) -> str: prompt = router_prompt_template.replace("{query}", query) logger.info("Keywords failed. Asking LLM for Intent...") - # Kurzer Raw Call raw_response = await llm.generate_raw_response(prompt) - # --- Robust Parsing für SLMs --- - # Wir suchen nach den bekannten Strategie-Namen im Output + # Parsing logic llm_output_upper = raw_response.upper() - logger.info(f"LLM Router Raw Output: '{raw_response}'") # Debugging - found_intents = [] for strat_key in strategies.keys(): - # Wir prüfen, ob der Strategie-Name (z.B. "EMPATHY") im Text vorkommt if strat_key in llm_output_upper: found_intents.append(strat_key) - # Entscheidung - final_intent = "FACT" if len(found_intents) == 1: - # Eindeutiger Treffer - final_intent = found_intents[0] - logger.info(f"Intent detected via LLM (Parsed): {final_intent}") - return final_intent + return found_intents[0], "LLM Router (Slow Path)" elif len(found_intents) > 1: - # Mehrere Treffer (z.B. "Es ist FACT oder DECISION") -> Nimm den ersten oder Fallback - logger.warning(f"LLM returned multiple intents {found_intents}. Using first match: {found_intents[0]}") - return found_intents[0] + return found_intents[0], f"LLM Ambiguous {found_intents}" else: - logger.warning(f"LLM did not return a valid strategy name. Falling back to FACT.") + return "FACT", "LLM Fallback (No Match)" - return "FACT" + return "FACT", "Default (No Match)" @router.post("/", response_model=ChatResponse) async def chat_endpoint( @@ -163,9 +153,9 @@ async def chat_endpoint( logger.info(f"Chat request [{query_id}]: {request.message[:50]}...") try: - # 1. Intent Detection - intent = await _classify_intent(request.message, llm) - logger.info(f"[{query_id}] Final Intent: {intent}") + # 1. Intent Detection (mit Source) + intent, intent_source = await _classify_intent(request.message, llm) + logger.info(f"[{query_id}] Final Intent: {intent} via {intent_source}") # Strategy Load strategy = get_decision_strategy(intent) @@ -227,7 +217,8 @@ async def chat_endpoint( answer=answer_text, sources=hits, latency_ms=duration_ms, - intent=intent + intent=intent, + intent_source=intent_source # Source durchreichen ) except Exception as e: diff --git a/docs/appendix.md b/docs/appendix.md index 079740c..4bdcea2 100644 --- a/docs/appendix.md +++ b/docs/appendix.md @@ -71,6 +71,7 @@ Diese sind die Felder, die effektiv in Qdrant gespeichert werden. { "chunk_id": "string (keyword)", // Format: {note_id}#c{index} "note_id": "string (keyword)", // FK zur Note + "type": "string (keyword)", // Typ-Kopie aus Note (Neu in WP06a) "text": "string (text)", // Reintext für Anzeige (ohne Overlap) "window": "string (text)", // Text + Overlap (für Embedding) "ord": "integer", // Laufende Nummer (1..N) diff --git a/docs/mindnet_technical_architecture.md b/docs/mindnet_technical_architecture.md index 3fb41da..b913eac 100644 --- a/docs/mindnet_technical_architecture.md +++ b/docs/mindnet_technical_architecture.md @@ -134,6 +134,7 @@ Die atomaren Sucheinheiten. | :--- | :--- | :--- | | `chunk_id` | Keyword | Deterministisch: `{note_id}#c{index:02d}`. | | `note_id` | Keyword | Referenz zur Note. | + | `type` | Keyword | **Kopie des Note-Typs** (Denormalisiert für Filter). | | `text` | Text | **Reiner Inhalt** (ohne Overlap). Anzeige-Text. | | `window` | Text | **Kontext-Fenster** (mit Overlap). Embedding-Basis. | | `ord` | Integer | Sortierreihenfolge (1..N). | @@ -336,7 +337,7 @@ Damit Qdrant performant bleibt, sind Payload-Indizes essenziell. **Erforderliche Indizes:** * **Notes:** `note_id`, `type`, `tags`. -* **Chunks:** `note_id`, `chunk_id`. +* **Chunks:** `note_id`, `chunk_id`, `type`. * **Edges:** `source_id`, `target_id`, `kind`, `scope`, `note_id`. Validierung erfolgt über `tests/ensure_indexes_and_show.py`. diff --git a/tests/test_wp06_decision.py b/tests/test_wp06_decision.py index 636801f..f9725e7 100644 --- a/tests/test_wp06_decision.py +++ b/tests/test_wp06_decision.py @@ -1,8 +1,10 @@ """ tests/test_wp06_decision.py — Flexibler Integrationstest für WP-06 Update: -- Timeout auf 300s erhöht. -- Robusteres Auslesen der Metadaten (Payload/Source Fix). +- Timeout 300s. +- Zeigt Intent Source an. +- Payload/Source Fallback für Metadaten. +- Debug-Dump bei unknown Type. """ import requests import json @@ -24,7 +26,7 @@ def test_decision_engine(query: str, port: int, expected_intent: str): print(f"FRAGE: '{query}'") print("... warte auf LLM (kann auf CPU >120s dauern) ...") - # FIX: Timeout auf 300 erhöht, passend zur Server-Config + # FIX: Timeout auf 300 erhöht response = requests.post(f"{api_url}/chat/", json=payload, timeout=300) response.raise_for_status() data = response.json() @@ -33,11 +35,14 @@ def test_decision_engine(query: str, port: int, expected_intent: str): # 1. Intent Check intent = data.get("intent", "UNKNOWN") - # Wir normalisieren auf Großbuchstaben für den Vergleich + source_method = data.get("intent_source", "Unknown Source") + match = intent.upper() == expected_intent.upper() print(f"\n1. INTENT DETECTION: [{'✅' if match else '❌'}]") - print(f" Erkannt: {intent} (Erwartet: {expected_intent})") + print(f" Erkannt: {intent}") + print(f" Erwartet: {expected_intent}") + print(f" Methode: {source_method}") # 2. Source Check (Strategic Retrieval) sources = data.get("sources", []) @@ -48,19 +53,25 @@ def test_decision_engine(query: str, port: int, expected_intent: str): if not sources: print(" (Keine Quellen gefunden)") + debug_printed = False + for i, source in enumerate(sources): # --- FIX: Robusterer Zugriff auf Metadaten --- - # Qdrant liefert Daten oft in 'payload', Mindnet DTOs manchmal in 'source' - # Wir prüfen beides, um "Typ: unknown" zu vermeiden. src_meta = source.get("payload") or source.get("source") or {} node_type = src_meta.get("type", "unknown") title = source.get("note_id", "Unknown") score = source.get("total_score", 0.0) + # DEBUG: Wenn Typ unknown ist, dumpen wir das erste Objekt + if node_type == "unknown" and not debug_printed: + print(f"\n 🔴 DEBUG: Raw Data von Quelle {i+1} (da Typ unknown):") + print(json.dumps(source, indent=2, ensure_ascii=False)) + print(" ------------------------------------------------") + debug_printed = True + # Marker für Ausgabe marker = " " - # Liste aller strategischen Typen, die wir besonders hervorheben wollen if node_type in ["value", "principle", "goal", "experience", "belief", "profile", "decision"]: marker = "🎯" # Strategischer Treffer strategic_hits.append(f"{title} ({node_type})")