WP06a #5
|
|
@ -2,7 +2,7 @@
|
||||||
app/core/retriever.py — Hybrider Such-Algorithmus
|
app/core/retriever.py — Hybrider Such-Algorithmus
|
||||||
|
|
||||||
Version:
|
Version:
|
||||||
0.5.2 (WP-05 Fix: Pass content in QueryHit source)
|
0.5.3 (WP-06 Fix: Populate 'payload' in QueryHit for meta-data access)
|
||||||
"""
|
"""
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
|
@ -127,7 +127,9 @@ def _build_explanation(
|
||||||
node_key: Optional[str]
|
node_key: Optional[str]
|
||||||
) -> Explanation:
|
) -> Explanation:
|
||||||
"""Erstellt ein Explanation-Objekt."""
|
"""Erstellt ein Explanation-Objekt."""
|
||||||
sem_w, edge_w, cent_w = _get_scoring_weights()
|
sem_w, _edge_w, _cent_w = _get_scoring_weights()
|
||||||
|
# Scoring weights erneut laden für Reason-Details
|
||||||
|
_, edge_w_cfg, cent_w_cfg = _get_scoring_weights()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
type_weight = float(payload.get("retriever_weight", 1.0))
|
type_weight = float(payload.get("retriever_weight", 1.0))
|
||||||
|
|
@ -138,8 +140,8 @@ def _build_explanation(
|
||||||
|
|
||||||
breakdown = ScoreBreakdown(
|
breakdown = ScoreBreakdown(
|
||||||
semantic_contribution=(sem_w * semantic_score * type_weight),
|
semantic_contribution=(sem_w * semantic_score * type_weight),
|
||||||
edge_contribution=(edge_w * edge_bonus),
|
edge_contribution=(edge_w_cfg * edge_bonus),
|
||||||
centrality_contribution=(cent_w * cent_bonus),
|
centrality_contribution=(cent_w_cfg * cent_bonus),
|
||||||
raw_semantic=semantic_score,
|
raw_semantic=semantic_score,
|
||||||
raw_edge_bonus=edge_bonus,
|
raw_edge_bonus=edge_bonus,
|
||||||
raw_centrality=cent_bonus,
|
raw_centrality=cent_bonus,
|
||||||
|
|
@ -179,7 +181,7 @@ def _build_explanation(
|
||||||
|
|
||||||
all_edges = sorted(edges_dto, key=lambda e: e.weight, reverse=True)
|
all_edges = sorted(edges_dto, key=lambda e: e.weight, reverse=True)
|
||||||
for top_edge in all_edges[:3]:
|
for top_edge in all_edges[:3]:
|
||||||
impact = edge_w * top_edge.weight
|
impact = edge_w_cfg * top_edge.weight
|
||||||
dir_txt = "Verweist auf" if top_edge.direction == "out" else "Referenziert von"
|
dir_txt = "Verweist auf" if top_edge.direction == "out" else "Referenziert von"
|
||||||
tgt_txt = top_edge.target if top_edge.direction == "out" else top_edge.source
|
tgt_txt = top_edge.target if top_edge.direction == "out" else top_edge.source
|
||||||
reasons.append(Reason(kind="edge", message=f"{dir_txt} '{tgt_txt}' via '{top_edge.kind}'", score_impact=impact, details={"kind": top_edge.kind}))
|
reasons.append(Reason(kind="edge", message=f"{dir_txt} '{tgt_txt}' via '{top_edge.kind}'", score_impact=impact, details={"kind": top_edge.kind}))
|
||||||
|
|
@ -261,7 +263,6 @@ def _build_hits_from_semantic(
|
||||||
node_key=payload.get("chunk_id") or payload.get("note_id")
|
node_key=payload.get("chunk_id") or payload.get("note_id")
|
||||||
)
|
)
|
||||||
|
|
||||||
# FIX: Hier holen wir jetzt den Textinhalt (text, content oder page_content) aus dem Payload
|
|
||||||
text_content = payload.get("page_content") or payload.get("text") or payload.get("content")
|
text_content = payload.get("page_content") or payload.get("text") or payload.get("content")
|
||||||
|
|
||||||
results.append(QueryHit(
|
results.append(QueryHit(
|
||||||
|
|
@ -275,8 +276,10 @@ def _build_hits_from_semantic(
|
||||||
source={
|
source={
|
||||||
"path": payload.get("path"),
|
"path": payload.get("path"),
|
||||||
"section": payload.get("section") or payload.get("section_title"),
|
"section": payload.get("section") or payload.get("section_title"),
|
||||||
"text": text_content # WICHTIG: Inhalt durchreichen
|
"text": text_content
|
||||||
},
|
},
|
||||||
|
# --- FIX: Wir füllen das payload-Feld explizit ---
|
||||||
|
payload=payload,
|
||||||
explanation=explanation_obj
|
explanation=explanation_obj
|
||||||
))
|
))
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -3,12 +3,12 @@ app/models/dto.py — Pydantic-Modelle (DTOs) für WP-04/WP-05/WP-06
|
||||||
|
|
||||||
Zweck:
|
Zweck:
|
||||||
Laufzeit-Modelle für FastAPI (Requests/Responses).
|
Laufzeit-Modelle für FastAPI (Requests/Responses).
|
||||||
WP-06 Update: Intent in ChatResponse.
|
WP-06 Update: Intent & Intent-Source in ChatResponse.
|
||||||
|
|
||||||
Version:
|
Version:
|
||||||
0.6.0 (WP-06: Decision Engine)
|
0.6.1 (WP-06: Decision Engine Transparency)
|
||||||
Stand:
|
Stand:
|
||||||
2025-12-08
|
2025-12-09
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
@ -123,6 +123,7 @@ class QueryHit(BaseModel):
|
||||||
total_score: float
|
total_score: float
|
||||||
paths: Optional[List[List[Dict]]] = None
|
paths: Optional[List[List[Dict]]] = None
|
||||||
source: Optional[Dict] = None
|
source: Optional[Dict] = None
|
||||||
|
payload: Optional[Dict] = None # Added for flexibility & WP-06 meta-data
|
||||||
explanation: Optional[Explanation] = None
|
explanation: Optional[Explanation] = None
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -150,4 +151,5 @@ class ChatResponse(BaseModel):
|
||||||
answer: str = Field(..., description="Generierte Antwort vom LLM")
|
answer: str = Field(..., description="Generierte Antwort vom LLM")
|
||||||
sources: List[QueryHit] = Field(..., description="Die für die Antwort genutzten Quellen")
|
sources: List[QueryHit] = Field(..., description="Die für die Antwort genutzten Quellen")
|
||||||
latency_ms: int
|
latency_ms: int
|
||||||
intent: Optional[str] = Field("FACT", description="WP-06: Erkannter Intent (FACT/DECISION)")
|
intent: Optional[str] = Field("FACT", description="WP-06: Erkannter Intent (FACT/DECISION)")
|
||||||
|
intent_source: Optional[str] = Field("Unknown", description="WP-06: Quelle der Intent-Erkennung (Keyword vs. LLM)")
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
"""
|
"""
|
||||||
app/routers/chat.py — RAG Endpunkt (WP-06 Hybrid Router v2)
|
app/routers/chat.py — RAG Endpunkt (WP-06 Hybrid Router v3)
|
||||||
Update: Robusteres LLM-Parsing für Small Language Models (SLMs).
|
Update: Transparenz über Intent-Source (Keyword vs. LLM).
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from fastapi import APIRouter, HTTPException, Depends
|
from fastapi import APIRouter, HTTPException, Depends
|
||||||
|
|
@ -76,7 +76,11 @@ def _build_enriched_context(hits: List[QueryHit]) -> str:
|
||||||
"[Kein Text]"
|
"[Kein Text]"
|
||||||
)
|
)
|
||||||
title = hit.note_id or "Unbekannt"
|
title = hit.note_id or "Unbekannt"
|
||||||
note_type = source.get("type", "unknown").upper()
|
|
||||||
|
# FIX: Wir holen den Typ aus Payload oder Source (Fallback)
|
||||||
|
payload = hit.payload or {}
|
||||||
|
note_type = payload.get("type") or source.get("type", "unknown")
|
||||||
|
note_type = str(note_type).upper()
|
||||||
|
|
||||||
entry = (
|
entry = (
|
||||||
f"### QUELLE {i}: {title}\n"
|
f"### QUELLE {i}: {title}\n"
|
||||||
|
|
@ -87,11 +91,10 @@ def _build_enriched_context(hits: List[QueryHit]) -> str:
|
||||||
|
|
||||||
return "\n\n".join(context_parts)
|
return "\n\n".join(context_parts)
|
||||||
|
|
||||||
async def _classify_intent(query: str, llm: LLMService) -> str:
|
async def _classify_intent(query: str, llm: LLMService) -> tuple[str, str]:
|
||||||
"""
|
"""
|
||||||
Hybrid Router v2:
|
Hybrid Router v3:
|
||||||
1. Keyword Check (Best/Longest Match) -> FAST
|
Gibt Tuple zurück: (Intent, Source)
|
||||||
2. LLM Fallback (Robust Parsing) -> SMART
|
|
||||||
"""
|
"""
|
||||||
config = get_full_config()
|
config = get_full_config()
|
||||||
strategies = config.get("strategies", {})
|
strategies = config.get("strategies", {})
|
||||||
|
|
@ -112,8 +115,7 @@ async def _classify_intent(query: str, llm: LLMService) -> str:
|
||||||
best_intent = intent_name
|
best_intent = intent_name
|
||||||
|
|
||||||
if best_intent:
|
if best_intent:
|
||||||
logger.info(f"Intent detected via KEYWORD: {best_intent}")
|
return best_intent, "Keyword (Fast Path)"
|
||||||
return best_intent
|
|
||||||
|
|
||||||
# 2. SLOW PATH: LLM Router
|
# 2. SLOW PATH: LLM Router
|
||||||
if settings.get("llm_fallback_enabled", False):
|
if settings.get("llm_fallback_enabled", False):
|
||||||
|
|
@ -122,35 +124,23 @@ async def _classify_intent(query: str, llm: LLMService) -> str:
|
||||||
prompt = router_prompt_template.replace("{query}", query)
|
prompt = router_prompt_template.replace("{query}", query)
|
||||||
logger.info("Keywords failed. Asking LLM for Intent...")
|
logger.info("Keywords failed. Asking LLM for Intent...")
|
||||||
|
|
||||||
# Kurzer Raw Call
|
|
||||||
raw_response = await llm.generate_raw_response(prompt)
|
raw_response = await llm.generate_raw_response(prompt)
|
||||||
|
|
||||||
# --- Robust Parsing für SLMs ---
|
# Parsing logic
|
||||||
# Wir suchen nach den bekannten Strategie-Namen im Output
|
|
||||||
llm_output_upper = raw_response.upper()
|
llm_output_upper = raw_response.upper()
|
||||||
logger.info(f"LLM Router Raw Output: '{raw_response}'") # Debugging
|
|
||||||
|
|
||||||
found_intents = []
|
found_intents = []
|
||||||
for strat_key in strategies.keys():
|
for strat_key in strategies.keys():
|
||||||
# Wir prüfen, ob der Strategie-Name (z.B. "EMPATHY") im Text vorkommt
|
|
||||||
if strat_key in llm_output_upper:
|
if strat_key in llm_output_upper:
|
||||||
found_intents.append(strat_key)
|
found_intents.append(strat_key)
|
||||||
|
|
||||||
# Entscheidung
|
|
||||||
final_intent = "FACT"
|
|
||||||
if len(found_intents) == 1:
|
if len(found_intents) == 1:
|
||||||
# Eindeutiger Treffer
|
return found_intents[0], "LLM Router (Slow Path)"
|
||||||
final_intent = found_intents[0]
|
|
||||||
logger.info(f"Intent detected via LLM (Parsed): {final_intent}")
|
|
||||||
return final_intent
|
|
||||||
elif len(found_intents) > 1:
|
elif len(found_intents) > 1:
|
||||||
# Mehrere Treffer (z.B. "Es ist FACT oder DECISION") -> Nimm den ersten oder Fallback
|
return found_intents[0], f"LLM Ambiguous {found_intents}"
|
||||||
logger.warning(f"LLM returned multiple intents {found_intents}. Using first match: {found_intents[0]}")
|
|
||||||
return found_intents[0]
|
|
||||||
else:
|
else:
|
||||||
logger.warning(f"LLM did not return a valid strategy name. Falling back to FACT.")
|
return "FACT", "LLM Fallback (No Match)"
|
||||||
|
|
||||||
return "FACT"
|
return "FACT", "Default (No Match)"
|
||||||
|
|
||||||
@router.post("/", response_model=ChatResponse)
|
@router.post("/", response_model=ChatResponse)
|
||||||
async def chat_endpoint(
|
async def chat_endpoint(
|
||||||
|
|
@ -163,9 +153,9 @@ async def chat_endpoint(
|
||||||
logger.info(f"Chat request [{query_id}]: {request.message[:50]}...")
|
logger.info(f"Chat request [{query_id}]: {request.message[:50]}...")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# 1. Intent Detection
|
# 1. Intent Detection (mit Source)
|
||||||
intent = await _classify_intent(request.message, llm)
|
intent, intent_source = await _classify_intent(request.message, llm)
|
||||||
logger.info(f"[{query_id}] Final Intent: {intent}")
|
logger.info(f"[{query_id}] Final Intent: {intent} via {intent_source}")
|
||||||
|
|
||||||
# Strategy Load
|
# Strategy Load
|
||||||
strategy = get_decision_strategy(intent)
|
strategy = get_decision_strategy(intent)
|
||||||
|
|
@ -227,7 +217,8 @@ async def chat_endpoint(
|
||||||
answer=answer_text,
|
answer=answer_text,
|
||||||
sources=hits,
|
sources=hits,
|
||||||
latency_ms=duration_ms,
|
latency_ms=duration_ms,
|
||||||
intent=intent
|
intent=intent,
|
||||||
|
intent_source=intent_source # Source durchreichen
|
||||||
)
|
)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|
|
||||||
|
|
@ -71,6 +71,7 @@ Diese sind die Felder, die effektiv in Qdrant gespeichert werden.
|
||||||
{
|
{
|
||||||
"chunk_id": "string (keyword)", // Format: {note_id}#c{index}
|
"chunk_id": "string (keyword)", // Format: {note_id}#c{index}
|
||||||
"note_id": "string (keyword)", // FK zur Note
|
"note_id": "string (keyword)", // FK zur Note
|
||||||
|
"type": "string (keyword)", // Typ-Kopie aus Note (Neu in WP06a)
|
||||||
"text": "string (text)", // Reintext für Anzeige (ohne Overlap)
|
"text": "string (text)", // Reintext für Anzeige (ohne Overlap)
|
||||||
"window": "string (text)", // Text + Overlap (für Embedding)
|
"window": "string (text)", // Text + Overlap (für Embedding)
|
||||||
"ord": "integer", // Laufende Nummer (1..N)
|
"ord": "integer", // Laufende Nummer (1..N)
|
||||||
|
|
|
||||||
|
|
@ -134,6 +134,7 @@ Die atomaren Sucheinheiten.
|
||||||
| :--- | :--- | :--- |
|
| :--- | :--- | :--- |
|
||||||
| `chunk_id` | Keyword | Deterministisch: `{note_id}#c{index:02d}`. |
|
| `chunk_id` | Keyword | Deterministisch: `{note_id}#c{index:02d}`. |
|
||||||
| `note_id` | Keyword | Referenz zur Note. |
|
| `note_id` | Keyword | Referenz zur Note. |
|
||||||
|
| `type` | Keyword | **Kopie des Note-Typs** (Denormalisiert für Filter). |
|
||||||
| `text` | Text | **Reiner Inhalt** (ohne Overlap). Anzeige-Text. |
|
| `text` | Text | **Reiner Inhalt** (ohne Overlap). Anzeige-Text. |
|
||||||
| `window` | Text | **Kontext-Fenster** (mit Overlap). Embedding-Basis. |
|
| `window` | Text | **Kontext-Fenster** (mit Overlap). Embedding-Basis. |
|
||||||
| `ord` | Integer | Sortierreihenfolge (1..N). |
|
| `ord` | Integer | Sortierreihenfolge (1..N). |
|
||||||
|
|
@ -336,7 +337,7 @@ Damit Qdrant performant bleibt, sind Payload-Indizes essenziell.
|
||||||
|
|
||||||
**Erforderliche Indizes:**
|
**Erforderliche Indizes:**
|
||||||
* **Notes:** `note_id`, `type`, `tags`.
|
* **Notes:** `note_id`, `type`, `tags`.
|
||||||
* **Chunks:** `note_id`, `chunk_id`.
|
* **Chunks:** `note_id`, `chunk_id`, `type`.
|
||||||
* **Edges:** `source_id`, `target_id`, `kind`, `scope`, `note_id`.
|
* **Edges:** `source_id`, `target_id`, `kind`, `scope`, `note_id`.
|
||||||
|
|
||||||
Validierung erfolgt über `tests/ensure_indexes_and_show.py`.
|
Validierung erfolgt über `tests/ensure_indexes_and_show.py`.
|
||||||
|
|
|
||||||
|
|
@ -1,8 +1,10 @@
|
||||||
"""
|
"""
|
||||||
tests/test_wp06_decision.py — Flexibler Integrationstest für WP-06
|
tests/test_wp06_decision.py — Flexibler Integrationstest für WP-06
|
||||||
Update:
|
Update:
|
||||||
- Timeout auf 300s erhöht.
|
- Timeout 300s.
|
||||||
- Robusteres Auslesen der Metadaten (Payload/Source Fix).
|
- Zeigt Intent Source an.
|
||||||
|
- Payload/Source Fallback für Metadaten.
|
||||||
|
- Debug-Dump bei unknown Type.
|
||||||
"""
|
"""
|
||||||
import requests
|
import requests
|
||||||
import json
|
import json
|
||||||
|
|
@ -24,7 +26,7 @@ def test_decision_engine(query: str, port: int, expected_intent: str):
|
||||||
print(f"FRAGE: '{query}'")
|
print(f"FRAGE: '{query}'")
|
||||||
print("... warte auf LLM (kann auf CPU >120s dauern) ...")
|
print("... warte auf LLM (kann auf CPU >120s dauern) ...")
|
||||||
|
|
||||||
# FIX: Timeout auf 300 erhöht, passend zur Server-Config
|
# FIX: Timeout auf 300 erhöht
|
||||||
response = requests.post(f"{api_url}/chat/", json=payload, timeout=300)
|
response = requests.post(f"{api_url}/chat/", json=payload, timeout=300)
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
data = response.json()
|
data = response.json()
|
||||||
|
|
@ -33,11 +35,14 @@ def test_decision_engine(query: str, port: int, expected_intent: str):
|
||||||
|
|
||||||
# 1. Intent Check
|
# 1. Intent Check
|
||||||
intent = data.get("intent", "UNKNOWN")
|
intent = data.get("intent", "UNKNOWN")
|
||||||
# Wir normalisieren auf Großbuchstaben für den Vergleich
|
source_method = data.get("intent_source", "Unknown Source")
|
||||||
|
|
||||||
match = intent.upper() == expected_intent.upper()
|
match = intent.upper() == expected_intent.upper()
|
||||||
|
|
||||||
print(f"\n1. INTENT DETECTION: [{'✅' if match else '❌'}]")
|
print(f"\n1. INTENT DETECTION: [{'✅' if match else '❌'}]")
|
||||||
print(f" Erkannt: {intent} (Erwartet: {expected_intent})")
|
print(f" Erkannt: {intent}")
|
||||||
|
print(f" Erwartet: {expected_intent}")
|
||||||
|
print(f" Methode: {source_method}")
|
||||||
|
|
||||||
# 2. Source Check (Strategic Retrieval)
|
# 2. Source Check (Strategic Retrieval)
|
||||||
sources = data.get("sources", [])
|
sources = data.get("sources", [])
|
||||||
|
|
@ -48,19 +53,25 @@ def test_decision_engine(query: str, port: int, expected_intent: str):
|
||||||
if not sources:
|
if not sources:
|
||||||
print(" (Keine Quellen gefunden)")
|
print(" (Keine Quellen gefunden)")
|
||||||
|
|
||||||
|
debug_printed = False
|
||||||
|
|
||||||
for i, source in enumerate(sources):
|
for i, source in enumerate(sources):
|
||||||
# --- FIX: Robusterer Zugriff auf Metadaten ---
|
# --- FIX: Robusterer Zugriff auf Metadaten ---
|
||||||
# Qdrant liefert Daten oft in 'payload', Mindnet DTOs manchmal in 'source'
|
|
||||||
# Wir prüfen beides, um "Typ: unknown" zu vermeiden.
|
|
||||||
src_meta = source.get("payload") or source.get("source") or {}
|
src_meta = source.get("payload") or source.get("source") or {}
|
||||||
|
|
||||||
node_type = src_meta.get("type", "unknown")
|
node_type = src_meta.get("type", "unknown")
|
||||||
title = source.get("note_id", "Unknown")
|
title = source.get("note_id", "Unknown")
|
||||||
score = source.get("total_score", 0.0)
|
score = source.get("total_score", 0.0)
|
||||||
|
|
||||||
|
# DEBUG: Wenn Typ unknown ist, dumpen wir das erste Objekt
|
||||||
|
if node_type == "unknown" and not debug_printed:
|
||||||
|
print(f"\n 🔴 DEBUG: Raw Data von Quelle {i+1} (da Typ unknown):")
|
||||||
|
print(json.dumps(source, indent=2, ensure_ascii=False))
|
||||||
|
print(" ------------------------------------------------")
|
||||||
|
debug_printed = True
|
||||||
|
|
||||||
# Marker für Ausgabe
|
# Marker für Ausgabe
|
||||||
marker = " "
|
marker = " "
|
||||||
# Liste aller strategischen Typen, die wir besonders hervorheben wollen
|
|
||||||
if node_type in ["value", "principle", "goal", "experience", "belief", "profile", "decision"]:
|
if node_type in ["value", "principle", "goal", "experience", "belief", "profile", "decision"]:
|
||||||
marker = "🎯" # Strategischer Treffer
|
marker = "🎯" # Strategischer Treffer
|
||||||
strategic_hits.append(f"{title} ({node_type})")
|
strategic_hits.append(f"{title} ({node_type})")
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user