mindnet/app/routers/chat.py
2025-12-08 14:27:17 +01:00

114 lines
3.6 KiB
Python

"""
app/routers/chat.py — RAG Endpunkt (WP-05 Final Audit Version)
Zweck:
Verbindet Retrieval mit LLM-Generation.
Enriched Context: Fügt Typen und Metadaten in den Prompt ein,
damit das LLM komplexe Zusammenhänge (z.B. Decisions) versteht.
"""
from fastapi import APIRouter, HTTPException, Depends
from typing import List
import time
import uuid
import logging
from app.models.dto import ChatRequest, ChatResponse, QueryRequest, QueryHit
from app.services.llm_service import LLMService
from app.core.retriever import Retriever
router = APIRouter()
logger = logging.getLogger(__name__)
def get_llm_service():
return LLMService()
def get_retriever():
return Retriever()
def _build_enriched_context(hits: List[QueryHit]) -> str:
"""
Baut einen 'Rich Context' String.
Statt nur Text, injizieren wir Metadaten (Typ, Tags), damit das LLM
die semantische Rolle des Schnipsels versteht.
"""
context_parts = []
for i, hit in enumerate(hits, 1):
source = hit.source or {}
# 1. Content extrahieren (Robust: prüft alle üblichen Felder)
content = (
source.get("text") or
source.get("content") or
source.get("page_content") or
source.get("chunk_text") or
"[Kein Textinhalt verfügbar]"
)
# 2. Metadaten für "Context Intelligence"
title = hit.note_id or "Unbekannte Notiz"
# Typ in Großbuchstaben (z.B. "DECISION"), damit das LLM es als Signal erkennt
note_type = source.get("type", "unknown").upper()
# 3. Formatierung als strukturiertes Dokument für das LLM
entry = (
f"### QUELLE {i}: {title}\n"
f"TYP: [{note_type}] (Score: {hit.total_score:.2f})\n"
f"INHALT:\n{content}\n"
)
context_parts.append(entry)
return "\n\n".join(context_parts)
@router.post("/", response_model=ChatResponse)
async def chat_endpoint(
request: ChatRequest,
llm: LLMService = Depends(get_llm_service),
retriever: Retriever = Depends(get_retriever)
):
start_time = time.time()
query_id = str(uuid.uuid4())
logger.info(f"Chat request [{query_id}]: {request.message[:50]}...")
try:
# 1. Retrieval (Hybrid erzwingen für Graph-Nutzung)
query_req = QueryRequest(
query=request.message,
mode="hybrid", # WICHTIG: Hybrid Mode für Graph-Nachbarn
top_k=request.top_k,
explain=request.explain
)
retrieve_result = await retriever.search(query_req)
hits = retrieve_result.results
# 2. Context Building (Enriched)
if not hits:
logger.info(f"[{query_id}] No hits found.")
context_str = "Keine relevanten Notizen gefunden."
else:
context_str = _build_enriched_context(hits)
# 3. Generation
logger.info(f"[{query_id}] Context built with {len(hits)} chunks. Sending to LLM...")
answer_text = await llm.generate_rag_response(
query=request.message,
context_str=context_str
)
# 4. Response
duration_ms = int((time.time() - start_time) * 1000)
logger.info(f"[{query_id}] Completed in {duration_ms}ms")
return ChatResponse(
query_id=retrieve_result.query_id,
answer=answer_text,
sources=hits,
latency_ms=duration_ms
)
except Exception as e:
logger.error(f"Error in chat endpoint: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=str(e))