mindnet/app/routers/chat.py

199 lines
6.4 KiB
Python

"""
app/routers/chat.py — RAG Endpunkt (WP-06 Decision Engine - Late Binding Refactor)
Zweck:
Verbindet Retrieval mit LLM-Generation.
WP-06: Implementiert Intent Detection und Strategic Retrieval.
Update: Konfiguration via decision_engine.yaml (Late Binding).
"""
from fastapi import APIRouter, HTTPException, Depends
from typing import List, Dict, Any
import time
import uuid
import logging
import yaml
from pathlib import Path
from app.config import get_settings
from app.models.dto import ChatRequest, ChatResponse, QueryRequest, QueryHit
from app.services.llm_service import LLMService
from app.core.retriever import Retriever
router = APIRouter()
logger = logging.getLogger(__name__)
# --- Helper: Config Loader ---
def _load_decision_config() -> Dict[str, Any]:
"""Lädt die Decision-Engine Konfiguration (Late Binding)."""
settings = get_settings()
path = Path(settings.DECISION_CONFIG_PATH)
default_config = {
"strategies": {
"FACT": {"inject_types": [], "prompt_template": "rag_template"},
"DECISION": {"inject_types": ["value", "principle"], "prompt_template": "decision_template"}
}
}
if not path.exists():
logger.warning(f"Decision config not found at {path}, using defaults.")
return default_config
try:
with open(path, "r", encoding="utf-8") as f:
return yaml.safe_load(f)
except Exception as e:
logger.error(f"Failed to load decision config: {e}")
return default_config
# Cache für die Config (damit wir nicht bei jedem Request lesen)
_DECISION_CONFIG_CACHE = None
def get_decision_strategy(intent: str) -> Dict[str, Any]:
global _DECISION_CONFIG_CACHE
if _DECISION_CONFIG_CACHE is None:
_DECISION_CONFIG_CACHE = _load_decision_config()
strategies = _DECISION_CONFIG_CACHE.get("strategies", {})
# Fallback auf FACT, wenn Intent unbekannt
return strategies.get(intent, strategies.get("FACT", {}))
# --- Dependencies ---
def get_llm_service():
return LLMService()
def get_retriever():
return Retriever()
# --- Logic ---
def _build_enriched_context(hits: List[QueryHit]) -> str:
"""
Baut einen 'Rich Context' String.
"""
context_parts = []
for i, hit in enumerate(hits, 1):
source = hit.source or {}
# 1. Content extrahieren
content = (
source.get("text") or
source.get("content") or
source.get("page_content") or
source.get("chunk_text") or
"[Kein Textinhalt verfügbar]"
)
# 2. Metadaten für "Context Intelligence"
title = hit.note_id or "Unbekannte Notiz"
note_type = source.get("type", "unknown").upper()
# 3. Formatierung
entry = (
f"### QUELLE {i}: {title}\n"
f"TYP: [{note_type}] (Score: {hit.total_score:.2f})\n"
f"INHALT:\n{content}\n"
)
context_parts.append(entry)
return "\n\n".join(context_parts)
async def _classify_intent(query: str, llm: LLMService) -> str:
"""
WP-06: Intent Detection (Simple Keyword Heuristic for Speed).
TODO: Move keywords to config if needed later.
"""
# Performance-Optimierung: Keywords statt LLM Call
keywords = ["soll ich", "meinung", "besser", "empfehlung", "strategie", "entscheidung", "wert", "prinzip"]
if any(k in query.lower() for k in keywords):
return "DECISION"
return "FACT"
@router.post("/", response_model=ChatResponse)
async def chat_endpoint(
request: ChatRequest,
llm: LLMService = Depends(get_llm_service),
retriever: Retriever = Depends(get_retriever)
):
start_time = time.time()
query_id = str(uuid.uuid4())
logger.info(f"Chat request [{query_id}]: {request.message[:50]}...")
try:
# 1. Intent Detection
intent = await _classify_intent(request.message, llm)
logger.info(f"[{query_id}] Detected Intent: {intent}")
# Lade Strategie aus Config (Late Binding)
strategy = get_decision_strategy(intent)
inject_types = strategy.get("inject_types", [])
prompt_key = strategy.get("prompt_template", "rag_template")
prepend_instr = strategy.get("prepend_instruction", "")
# 2. Primary Retrieval (Fakten)
query_req = QueryRequest(
query=request.message,
mode="hybrid",
top_k=request.top_k,
explain=request.explain
)
retrieve_result = await retriever.search(query_req)
hits = retrieve_result.results
# 3. Strategic Retrieval (Konfigurierbar)
if inject_types:
logger.info(f"[{query_id}] Executing Strategic Retrieval for types: {inject_types}...")
strategy_req = QueryRequest(
query=request.message,
mode="hybrid",
top_k=3,
filters={"type": inject_types}, # Dynamische Liste aus YAML
explain=False
)
strategy_result = await retriever.search(strategy_req)
# Merge Results (Deduplication via node_id)
existing_ids = {h.node_id for h in hits}
for strat_hit in strategy_result.results:
if strat_hit.node_id not in existing_ids:
hits.append(strat_hit)
# 4. Context Building
if not hits:
context_str = "Keine relevanten Notizen gefunden."
else:
context_str = _build_enriched_context(hits)
# 5. Generation Setup
template = llm.prompts.get(prompt_key, "{context_str}\n\n{query}")
# Injection der Instruktion (falls konfiguriert)
if prepend_instr:
context_str = f"{prepend_instr}\n\n{context_str}"
logger.info(f"[{query_id}] Sending to LLM (Intent: {intent}, Template: {prompt_key})...")
answer_text = await llm.generate_rag_response(
query=request.message,
context_str=context_str
)
# 6. Response
duration_ms = int((time.time() - start_time) * 1000)
return ChatResponse(
query_id=query_id,
answer=answer_text,
sources=hits,
latency_ms=duration_ms,
intent=intent
)
except Exception as e:
logger.error(f"Error in chat endpoint: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=str(e))