114 lines
3.6 KiB
Python
114 lines
3.6 KiB
Python
"""
|
|
app/routers/chat.py — RAG Endpunkt (WP-05 Final Audit Version)
|
|
|
|
Zweck:
|
|
Verbindet Retrieval mit LLM-Generation.
|
|
Enriched Context: Fügt Typen und Metadaten in den Prompt ein,
|
|
damit das LLM komplexe Zusammenhänge (z.B. Decisions) versteht.
|
|
"""
|
|
|
|
from fastapi import APIRouter, HTTPException, Depends
|
|
from typing import List
|
|
import time
|
|
import uuid
|
|
import logging
|
|
|
|
from app.models.dto import ChatRequest, ChatResponse, QueryRequest, QueryHit
|
|
from app.services.llm_service import LLMService
|
|
from app.core.retriever import Retriever
|
|
|
|
router = APIRouter()
|
|
logger = logging.getLogger(__name__)
|
|
|
|
def get_llm_service():
|
|
return LLMService()
|
|
|
|
def get_retriever():
|
|
return Retriever()
|
|
|
|
def _build_enriched_context(hits: List[QueryHit]) -> str:
|
|
"""
|
|
Baut einen 'Rich Context' String.
|
|
Statt nur Text, injizieren wir Metadaten (Typ, Tags), damit das LLM
|
|
die semantische Rolle des Schnipsels versteht.
|
|
"""
|
|
context_parts = []
|
|
|
|
for i, hit in enumerate(hits, 1):
|
|
source = hit.source or {}
|
|
|
|
# 1. Content extrahieren (Robust: prüft alle üblichen Felder)
|
|
content = (
|
|
source.get("text") or
|
|
source.get("content") or
|
|
source.get("page_content") or
|
|
source.get("chunk_text") or
|
|
"[Kein Textinhalt verfügbar]"
|
|
)
|
|
|
|
# 2. Metadaten für "Context Intelligence"
|
|
title = hit.note_id or "Unbekannte Notiz"
|
|
# Typ in Großbuchstaben (z.B. "DECISION"), damit das LLM es als Signal erkennt
|
|
note_type = source.get("type", "unknown").upper()
|
|
|
|
# 3. Formatierung als strukturiertes Dokument für das LLM
|
|
entry = (
|
|
f"### QUELLE {i}: {title}\n"
|
|
f"TYP: [{note_type}] (Score: {hit.total_score:.2f})\n"
|
|
f"INHALT:\n{content}\n"
|
|
)
|
|
context_parts.append(entry)
|
|
|
|
return "\n\n".join(context_parts)
|
|
|
|
@router.post("/", response_model=ChatResponse)
|
|
async def chat_endpoint(
|
|
request: ChatRequest,
|
|
llm: LLMService = Depends(get_llm_service),
|
|
retriever: Retriever = Depends(get_retriever)
|
|
):
|
|
start_time = time.time()
|
|
query_id = str(uuid.uuid4())
|
|
|
|
logger.info(f"Chat request [{query_id}]: {request.message[:50]}...")
|
|
|
|
try:
|
|
# 1. Retrieval (Hybrid erzwingen für Graph-Nutzung)
|
|
query_req = QueryRequest(
|
|
query=request.message,
|
|
mode="hybrid", # WICHTIG: Hybrid Mode für Graph-Nachbarn
|
|
top_k=request.top_k,
|
|
explain=request.explain
|
|
)
|
|
|
|
retrieve_result = await retriever.search(query_req)
|
|
hits = retrieve_result.results
|
|
|
|
# 2. Context Building (Enriched)
|
|
if not hits:
|
|
logger.info(f"[{query_id}] No hits found.")
|
|
context_str = "Keine relevanten Notizen gefunden."
|
|
else:
|
|
context_str = _build_enriched_context(hits)
|
|
|
|
# 3. Generation
|
|
logger.info(f"[{query_id}] Context built with {len(hits)} chunks. Sending to LLM...")
|
|
answer_text = await llm.generate_rag_response(
|
|
query=request.message,
|
|
context_str=context_str
|
|
)
|
|
|
|
# 4. Response
|
|
duration_ms = int((time.time() - start_time) * 1000)
|
|
logger.info(f"[{query_id}] Completed in {duration_ms}ms")
|
|
|
|
return ChatResponse(
|
|
query_id=retrieve_result.query_id,
|
|
answer=answer_text,
|
|
sources=hits,
|
|
latency_ms=duration_ms
|
|
)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error in chat endpoint: {e}", exc_info=True)
|
|
raise HTTPException(status_code=500, detail=str(e)) |