""" app/routers/chat.py — RAG Endpunkt (WP-05 Final Audit Version) Zweck: Verbindet Retrieval mit LLM-Generation. Enriched Context: Fügt Typen und Metadaten in den Prompt ein, damit das LLM komplexe Zusammenhänge (z.B. Decisions) versteht. """ from fastapi import APIRouter, HTTPException, Depends from typing import List import time import uuid import logging from app.models.dto import ChatRequest, ChatResponse, QueryRequest, QueryHit from app.services.llm_service import LLMService from app.core.retriever import Retriever router = APIRouter() logger = logging.getLogger(__name__) def get_llm_service(): return LLMService() def get_retriever(): return Retriever() def _build_enriched_context(hits: List[QueryHit]) -> str: """ Baut einen 'Rich Context' String. Statt nur Text, injizieren wir Metadaten (Typ, Tags), damit das LLM die semantische Rolle des Schnipsels versteht. """ context_parts = [] for i, hit in enumerate(hits, 1): source = hit.source or {} # 1. Content extrahieren (Robust: prüft alle üblichen Felder) content = ( source.get("text") or source.get("content") or source.get("page_content") or source.get("chunk_text") or "[Kein Textinhalt verfügbar]" ) # 2. Metadaten für "Context Intelligence" title = hit.note_id or "Unbekannte Notiz" # Typ in Großbuchstaben (z.B. "DECISION"), damit das LLM es als Signal erkennt note_type = source.get("type", "unknown").upper() # 3. Formatierung als strukturiertes Dokument für das LLM entry = ( f"### QUELLE {i}: {title}\n" f"TYP: [{note_type}] (Score: {hit.total_score:.2f})\n" f"INHALT:\n{content}\n" ) context_parts.append(entry) return "\n\n".join(context_parts) @router.post("/", response_model=ChatResponse) async def chat_endpoint( request: ChatRequest, llm: LLMService = Depends(get_llm_service), retriever: Retriever = Depends(get_retriever) ): start_time = time.time() query_id = str(uuid.uuid4()) logger.info(f"Chat request [{query_id}]: {request.message[:50]}...") try: # 1. Retrieval (Hybrid erzwingen für Graph-Nutzung) query_req = QueryRequest( query=request.message, mode="hybrid", # WICHTIG: Hybrid Mode für Graph-Nachbarn top_k=request.top_k, explain=request.explain ) retrieve_result = await retriever.search(query_req) hits = retrieve_result.results # 2. Context Building (Enriched) if not hits: logger.info(f"[{query_id}] No hits found.") context_str = "Keine relevanten Notizen gefunden." else: context_str = _build_enriched_context(hits) # 3. Generation logger.info(f"[{query_id}] Context built with {len(hits)} chunks. Sending to LLM...") answer_text = await llm.generate_rag_response( query=request.message, context_str=context_str ) # 4. Response duration_ms = int((time.time() - start_time) * 1000) logger.info(f"[{query_id}] Completed in {duration_ms}ms") return ChatResponse( query_id=retrieve_result.query_id, answer=answer_text, sources=hits, latency_ms=duration_ms ) except Exception as e: logger.error(f"Error in chat endpoint: {e}", exc_info=True) raise HTTPException(status_code=500, detail=str(e))