neue Test
This commit is contained in:
parent
d2270fafdd
commit
bd36d78025
|
|
@ -3,12 +3,12 @@ app/models/dto.py — Pydantic-Modelle (DTOs) für WP-04/WP-05/WP-06
|
|||
|
||||
Zweck:
|
||||
Laufzeit-Modelle für FastAPI (Requests/Responses).
|
||||
WP-06 Update: Intent in ChatResponse.
|
||||
WP-06 Update: Intent & Intent-Source in ChatResponse.
|
||||
|
||||
Version:
|
||||
0.6.0 (WP-06: Decision Engine)
|
||||
0.6.1 (WP-06: Decision Engine Transparency)
|
||||
Stand:
|
||||
2025-12-08
|
||||
2025-12-09
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
|
@ -123,6 +123,7 @@ class QueryHit(BaseModel):
|
|||
total_score: float
|
||||
paths: Optional[List[List[Dict]]] = None
|
||||
source: Optional[Dict] = None
|
||||
payload: Optional[Dict] = None # Added for flexibility
|
||||
explanation: Optional[Explanation] = None
|
||||
|
||||
|
||||
|
|
@ -151,3 +152,4 @@ class ChatResponse(BaseModel):
|
|||
sources: List[QueryHit] = Field(..., description="Die für die Antwort genutzten Quellen")
|
||||
latency_ms: int
|
||||
intent: Optional[str] = Field("FACT", description="WP-06: Erkannter Intent (FACT/DECISION)")
|
||||
intent_source: Optional[str] = Field("Unknown", description="WP-06: Quelle der Intent-Erkennung (Keyword vs. LLM)")
|
||||
|
|
@ -1,6 +1,6 @@
|
|||
"""
|
||||
app/routers/chat.py — RAG Endpunkt (WP-06 Hybrid Router v2)
|
||||
Update: Robusteres LLM-Parsing für Small Language Models (SLMs).
|
||||
app/routers/chat.py — RAG Endpunkt (WP-06 Hybrid Router v3)
|
||||
Update: Transparenz über Intent-Source (Keyword vs. LLM).
|
||||
"""
|
||||
|
||||
from fastapi import APIRouter, HTTPException, Depends
|
||||
|
|
@ -87,11 +87,10 @@ def _build_enriched_context(hits: List[QueryHit]) -> str:
|
|||
|
||||
return "\n\n".join(context_parts)
|
||||
|
||||
async def _classify_intent(query: str, llm: LLMService) -> str:
|
||||
async def _classify_intent(query: str, llm: LLMService) -> tuple[str, str]:
|
||||
"""
|
||||
Hybrid Router v2:
|
||||
1. Keyword Check (Best/Longest Match) -> FAST
|
||||
2. LLM Fallback (Robust Parsing) -> SMART
|
||||
Hybrid Router v3:
|
||||
Gibt Tuple zurück: (Intent, Source)
|
||||
"""
|
||||
config = get_full_config()
|
||||
strategies = config.get("strategies", {})
|
||||
|
|
@ -112,8 +111,7 @@ async def _classify_intent(query: str, llm: LLMService) -> str:
|
|||
best_intent = intent_name
|
||||
|
||||
if best_intent:
|
||||
logger.info(f"Intent detected via KEYWORD: {best_intent}")
|
||||
return best_intent
|
||||
return best_intent, "Keyword (Fast Path)"
|
||||
|
||||
# 2. SLOW PATH: LLM Router
|
||||
if settings.get("llm_fallback_enabled", False):
|
||||
|
|
@ -122,35 +120,23 @@ async def _classify_intent(query: str, llm: LLMService) -> str:
|
|||
prompt = router_prompt_template.replace("{query}", query)
|
||||
logger.info("Keywords failed. Asking LLM for Intent...")
|
||||
|
||||
# Kurzer Raw Call
|
||||
raw_response = await llm.generate_raw_response(prompt)
|
||||
|
||||
# --- Robust Parsing für SLMs ---
|
||||
# Wir suchen nach den bekannten Strategie-Namen im Output
|
||||
# Parsing logic
|
||||
llm_output_upper = raw_response.upper()
|
||||
logger.info(f"LLM Router Raw Output: '{raw_response}'") # Debugging
|
||||
|
||||
found_intents = []
|
||||
for strat_key in strategies.keys():
|
||||
# Wir prüfen, ob der Strategie-Name (z.B. "EMPATHY") im Text vorkommt
|
||||
if strat_key in llm_output_upper:
|
||||
found_intents.append(strat_key)
|
||||
|
||||
# Entscheidung
|
||||
final_intent = "FACT"
|
||||
if len(found_intents) == 1:
|
||||
# Eindeutiger Treffer
|
||||
final_intent = found_intents[0]
|
||||
logger.info(f"Intent detected via LLM (Parsed): {final_intent}")
|
||||
return final_intent
|
||||
return found_intents[0], "LLM Router (Slow Path)"
|
||||
elif len(found_intents) > 1:
|
||||
# Mehrere Treffer (z.B. "Es ist FACT oder DECISION") -> Nimm den ersten oder Fallback
|
||||
logger.warning(f"LLM returned multiple intents {found_intents}. Using first match: {found_intents[0]}")
|
||||
return found_intents[0]
|
||||
return found_intents[0], f"LLM Ambiguous {found_intents}"
|
||||
else:
|
||||
logger.warning(f"LLM did not return a valid strategy name. Falling back to FACT.")
|
||||
return "FACT", "LLM Fallback (No Match)"
|
||||
|
||||
return "FACT"
|
||||
return "FACT", "Default (No Match)"
|
||||
|
||||
@router.post("/", response_model=ChatResponse)
|
||||
async def chat_endpoint(
|
||||
|
|
@ -163,9 +149,9 @@ async def chat_endpoint(
|
|||
logger.info(f"Chat request [{query_id}]: {request.message[:50]}...")
|
||||
|
||||
try:
|
||||
# 1. Intent Detection
|
||||
intent = await _classify_intent(request.message, llm)
|
||||
logger.info(f"[{query_id}] Final Intent: {intent}")
|
||||
# 1. Intent Detection (mit Source)
|
||||
intent, intent_source = await _classify_intent(request.message, llm)
|
||||
logger.info(f"[{query_id}] Final Intent: {intent} via {intent_source}")
|
||||
|
||||
# Strategy Load
|
||||
strategy = get_decision_strategy(intent)
|
||||
|
|
@ -227,7 +213,8 @@ async def chat_endpoint(
|
|||
answer=answer_text,
|
||||
sources=hits,
|
||||
latency_ms=duration_ms,
|
||||
intent=intent
|
||||
intent=intent,
|
||||
intent_source=intent_source # NEU: Source durchreichen
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
|
|
|
|||
|
|
@ -1,8 +1,10 @@
|
|||
"""
|
||||
tests/test_wp06_decision.py — Flexibler Integrationstest für WP-06
|
||||
Update:
|
||||
- Timeout auf 300s erhöht.
|
||||
- Robusteres Auslesen der Metadaten (Payload/Source Fix).
|
||||
- Timeout 300s.
|
||||
- Zeigt Intent Source an.
|
||||
- Payload/Source Fallback für Metadaten.
|
||||
- Debug-Dump bei unknown Type.
|
||||
"""
|
||||
import requests
|
||||
import json
|
||||
|
|
@ -24,7 +26,7 @@ def test_decision_engine(query: str, port: int, expected_intent: str):
|
|||
print(f"FRAGE: '{query}'")
|
||||
print("... warte auf LLM (kann auf CPU >120s dauern) ...")
|
||||
|
||||
# FIX: Timeout auf 300 erhöht, passend zur Server-Config
|
||||
# FIX: Timeout auf 300 erhöht
|
||||
response = requests.post(f"{api_url}/chat/", json=payload, timeout=300)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
|
|
@ -33,11 +35,14 @@ def test_decision_engine(query: str, port: int, expected_intent: str):
|
|||
|
||||
# 1. Intent Check
|
||||
intent = data.get("intent", "UNKNOWN")
|
||||
# Wir normalisieren auf Großbuchstaben für den Vergleich
|
||||
source_method = data.get("intent_source", "Unknown Source")
|
||||
|
||||
match = intent.upper() == expected_intent.upper()
|
||||
|
||||
print(f"\n1. INTENT DETECTION: [{'✅' if match else '❌'}]")
|
||||
print(f" Erkannt: {intent} (Erwartet: {expected_intent})")
|
||||
print(f" Erkannt: {intent}")
|
||||
print(f" Erwartet: {expected_intent}")
|
||||
print(f" Methode: {source_method}")
|
||||
|
||||
# 2. Source Check (Strategic Retrieval)
|
||||
sources = data.get("sources", [])
|
||||
|
|
@ -48,19 +53,25 @@ def test_decision_engine(query: str, port: int, expected_intent: str):
|
|||
if not sources:
|
||||
print(" (Keine Quellen gefunden)")
|
||||
|
||||
debug_printed = False
|
||||
|
||||
for i, source in enumerate(sources):
|
||||
# --- FIX: Robusterer Zugriff auf Metadaten ---
|
||||
# Qdrant liefert Daten oft in 'payload', Mindnet DTOs manchmal in 'source'
|
||||
# Wir prüfen beides, um "Typ: unknown" zu vermeiden.
|
||||
src_meta = source.get("payload") or source.get("source") or {}
|
||||
|
||||
node_type = src_meta.get("type", "unknown")
|
||||
title = source.get("note_id", "Unknown")
|
||||
score = source.get("total_score", 0.0)
|
||||
|
||||
# DEBUG: Wenn Typ unknown ist, dumpen wir das erste Objekt
|
||||
if node_type == "unknown" and not debug_printed:
|
||||
print(f"\n 🔴 DEBUG: Raw Data von Quelle {i+1} (da Typ unknown):")
|
||||
print(json.dumps(source, indent=2, ensure_ascii=False))
|
||||
print(" ------------------------------------------------")
|
||||
debug_printed = True
|
||||
|
||||
# Marker für Ausgabe
|
||||
marker = " "
|
||||
# Liste aller strategischen Typen, die wir besonders hervorheben wollen
|
||||
if node_type in ["value", "principle", "goal", "experience", "belief", "profile", "decision"]:
|
||||
marker = "🎯" # Strategischer Treffer
|
||||
strategic_hits.append(f"{title} ({node_type})")
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user