236 lines
7.7 KiB
Python
236 lines
7.7 KiB
Python
"""
|
|
app/routers/chat.py — RAG Endpunkt (WP-06 Decision Engine - Full Config Refactor)
|
|
|
|
Zweck:
|
|
Verbindet Retrieval mit LLM-Generation.
|
|
WP-06: Implementiert Intent Detection und Strategic Retrieval.
|
|
Update: Konfiguration via decision_engine.yaml (Late Binding) mit 'Best Match' Logik.
|
|
"""
|
|
|
|
from fastapi import APIRouter, HTTPException, Depends
|
|
from typing import List, Dict, Any
|
|
import time
|
|
import uuid
|
|
import logging
|
|
import yaml
|
|
from pathlib import Path
|
|
|
|
from app.config import get_settings
|
|
from app.models.dto import ChatRequest, ChatResponse, QueryRequest, QueryHit
|
|
from app.services.llm_service import LLMService
|
|
from app.core.retriever import Retriever
|
|
|
|
router = APIRouter()
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# --- Helper: Config Loader ---
|
|
|
|
# Cache für die Config (damit wir nicht bei jedem Request lesen)
|
|
_DECISION_CONFIG_CACHE = None
|
|
|
|
def _load_decision_config() -> Dict[str, Any]:
|
|
"""Lädt die Decision-Engine Konfiguration (Late Binding)."""
|
|
settings = get_settings()
|
|
path = Path(settings.DECISION_CONFIG_PATH)
|
|
|
|
# Default Fallback, falls YAML kaputt/weg
|
|
default_config = {
|
|
"strategies": {
|
|
"FACT": {"trigger_keywords": []},
|
|
"DECISION": {
|
|
"trigger_keywords": ["soll ich", "meinung"],
|
|
"inject_types": ["value", "principle"],
|
|
"prompt_template": "decision_template"
|
|
}
|
|
}
|
|
}
|
|
|
|
if not path.exists():
|
|
logger.warning(f"Decision config not found at {path}, using defaults.")
|
|
return default_config
|
|
|
|
try:
|
|
with open(path, "r", encoding="utf-8") as f:
|
|
return yaml.safe_load(f)
|
|
except Exception as e:
|
|
logger.error(f"Failed to load decision config: {e}")
|
|
return default_config
|
|
|
|
def get_full_config() -> Dict[str, Any]:
|
|
"""Gibt die ganze Config zurück (für Intent Detection)."""
|
|
global _DECISION_CONFIG_CACHE
|
|
if _DECISION_CONFIG_CACHE is None:
|
|
_DECISION_CONFIG_CACHE = _load_decision_config()
|
|
return _DECISION_CONFIG_CACHE
|
|
|
|
def get_decision_strategy(intent: str) -> Dict[str, Any]:
|
|
"""Gibt die Strategie für einen spezifischen Intent zurück."""
|
|
config = get_full_config()
|
|
strategies = config.get("strategies", {})
|
|
# Fallback auf FACT, wenn Intent unbekannt
|
|
return strategies.get(intent, strategies.get("FACT", {}))
|
|
|
|
|
|
# --- Dependencies ---
|
|
|
|
def get_llm_service():
|
|
return LLMService()
|
|
|
|
def get_retriever():
|
|
return Retriever()
|
|
|
|
|
|
# --- Logic ---
|
|
|
|
def _build_enriched_context(hits: List[QueryHit]) -> str:
|
|
"""
|
|
Baut einen 'Rich Context' String.
|
|
Statt nur Text, injizieren wir Metadaten (Typ, Tags), damit das LLM
|
|
die semantische Rolle des Schnipsels versteht.
|
|
"""
|
|
context_parts = []
|
|
|
|
for i, hit in enumerate(hits, 1):
|
|
source = hit.source or {}
|
|
|
|
# 1. Content extrahieren
|
|
content = (
|
|
source.get("text") or
|
|
source.get("content") or
|
|
source.get("page_content") or
|
|
source.get("chunk_text") or
|
|
"[Kein Textinhalt verfügbar]"
|
|
)
|
|
|
|
# 2. Metadaten für "Context Intelligence"
|
|
title = hit.note_id or "Unbekannte Notiz"
|
|
note_type = source.get("type", "unknown").upper()
|
|
|
|
# 3. Formatierung
|
|
entry = (
|
|
f"### QUELLE {i}: {title}\n"
|
|
f"TYP: [{note_type}] (Score: {hit.total_score:.2f})\n"
|
|
f"INHALT:\n{content}\n"
|
|
)
|
|
context_parts.append(entry)
|
|
|
|
return "\n\n".join(context_parts)
|
|
|
|
async def _classify_intent(query: str, llm: LLMService) -> str:
|
|
"""
|
|
WP-06: Intent Detection (Best Match / Longest Keyword Wins).
|
|
|
|
Prüft Keywords aus der YAML gegen die Query.
|
|
Wenn mehrere Strategien passen, gewinnt die mit dem längsten Keyword (Spezifität).
|
|
"""
|
|
config = get_full_config()
|
|
strategies = config.get("strategies", {})
|
|
|
|
query_lower = query.lower()
|
|
|
|
best_intent = "FACT"
|
|
max_match_length = 0
|
|
|
|
# Iteriere über alle Strategien
|
|
for intent_name, strategy in strategies.items():
|
|
if intent_name == "FACT":
|
|
continue
|
|
|
|
keywords = strategy.get("trigger_keywords", [])
|
|
|
|
# Prüfe jedes Keyword
|
|
for k in keywords:
|
|
# Wenn Keyword im Text ist...
|
|
if k.lower() in query_lower:
|
|
# ... prüfen wir, ob es spezifischer (länger) ist als der bisherige Favorit
|
|
current_len = len(k)
|
|
if current_len > max_match_length:
|
|
max_match_length = current_len
|
|
best_intent = intent_name
|
|
# Wir brechen hier NICHT ab, sondern suchen weiter nach noch längeren Matches
|
|
|
|
return best_intent
|
|
|
|
@router.post("/", response_model=ChatResponse)
|
|
async def chat_endpoint(
|
|
request: ChatRequest,
|
|
llm: LLMService = Depends(get_llm_service),
|
|
retriever: Retriever = Depends(get_retriever)
|
|
):
|
|
start_time = time.time()
|
|
query_id = str(uuid.uuid4())
|
|
|
|
logger.info(f"Chat request [{query_id}]: {request.message[:50]}...")
|
|
|
|
try:
|
|
# 1. Intent Detection (Config-Driven & Best Match)
|
|
intent = await _classify_intent(request.message, llm)
|
|
logger.info(f"[{query_id}] Detected Intent: {intent}")
|
|
|
|
# Lade Strategie aus Config (Late Binding)
|
|
strategy = get_decision_strategy(intent)
|
|
inject_types = strategy.get("inject_types", [])
|
|
prompt_key = strategy.get("prompt_template", "rag_template")
|
|
prepend_instr = strategy.get("prepend_instruction", "")
|
|
|
|
# 2. Primary Retrieval (Fakten)
|
|
query_req = QueryRequest(
|
|
query=request.message,
|
|
mode="hybrid",
|
|
top_k=request.top_k,
|
|
explain=request.explain
|
|
)
|
|
retrieve_result = await retriever.search(query_req)
|
|
hits = retrieve_result.results
|
|
|
|
# 3. Strategic Retrieval (Konfigurierbar)
|
|
if inject_types:
|
|
logger.info(f"[{query_id}] Executing Strategic Retrieval for types: {inject_types}...")
|
|
strategy_req = QueryRequest(
|
|
query=request.message,
|
|
mode="hybrid",
|
|
top_k=3,
|
|
filters={"type": inject_types}, # Dynamische Liste aus YAML
|
|
explain=False
|
|
)
|
|
strategy_result = await retriever.search(strategy_req)
|
|
|
|
# Merge Results (Deduplication via node_id)
|
|
existing_ids = {h.node_id for h in hits}
|
|
for strat_hit in strategy_result.results:
|
|
if strat_hit.node_id not in existing_ids:
|
|
hits.append(strat_hit)
|
|
|
|
# 4. Context Building
|
|
if not hits:
|
|
context_str = "Keine relevanten Notizen gefunden."
|
|
else:
|
|
context_str = _build_enriched_context(hits)
|
|
|
|
# 5. Generation Setup
|
|
template = llm.prompts.get(prompt_key, "{context_str}\n\n{query}")
|
|
|
|
# Injection der Instruktion (falls konfiguriert)
|
|
if prepend_instr:
|
|
context_str = f"{prepend_instr}\n\n{context_str}"
|
|
|
|
logger.info(f"[{query_id}] Sending to LLM (Intent: {intent}, Template: {prompt_key})...")
|
|
answer_text = await llm.generate_rag_response(
|
|
query=request.message,
|
|
context_str=context_str
|
|
)
|
|
|
|
# 6. Response
|
|
duration_ms = int((time.time() - start_time) * 1000)
|
|
|
|
return ChatResponse(
|
|
query_id=query_id,
|
|
answer=answer_text,
|
|
sources=hits,
|
|
latency_ms=duration_ms,
|
|
intent=intent
|
|
)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error in chat endpoint: {e}", exc_info=True)
|
|
raise HTTPException(status_code=500, detail=str(e)) |