debug
This commit is contained in:
parent
9c4696e9d5
commit
b39663408c
|
|
@ -1,15 +1,8 @@
|
||||||
"""
|
"""
|
||||||
app/routers/chat.py — RAG Endpunkt (WP-05)
|
app/routers/chat.py — RAG Endpunkt (WP-05)
|
||||||
|
|
||||||
Zweck:
|
|
||||||
Verbindet Retrieval (WP-04) mit LLM-Generation (WP-05).
|
|
||||||
1. Empfängt User-Frage.
|
|
||||||
2. Sucht relevante Chunks (Retriever).
|
|
||||||
3. Baut Kontext-String.
|
|
||||||
4. Generiert Antwort via Ollama.
|
|
||||||
|
|
||||||
Version:
|
Version:
|
||||||
0.1.0
|
0.1.1 (Debug-Logging enabled)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from fastapi import APIRouter, HTTPException, Depends
|
from fastapi import APIRouter, HTTPException, Depends
|
||||||
|
|
@ -21,13 +14,11 @@ import logging
|
||||||
from app.models.dto import ChatRequest, ChatResponse, QueryRequest, QueryHit
|
from app.models.dto import ChatRequest, ChatResponse, QueryRequest, QueryHit
|
||||||
from app.services.llm_service import LLMService
|
from app.services.llm_service import LLMService
|
||||||
# Annahme: Der Retriever aus WP-04 liegt hier.
|
# Annahme: Der Retriever aus WP-04 liegt hier.
|
||||||
# Falls Import-Fehler: Bitte Pfad prüfen (z.B. app.services.retriever oder app.core.retriever)
|
|
||||||
from app.core.retriever import Retriever
|
from app.core.retriever import Retriever
|
||||||
|
|
||||||
router = APIRouter()
|
router = APIRouter()
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
# Dependency für Services (Singletons oder Factory wäre sauberer, hier pragmatisch instanziiert)
|
|
||||||
def get_llm_service():
|
def get_llm_service():
|
||||||
return LLMService()
|
return LLMService()
|
||||||
|
|
||||||
|
|
@ -37,18 +28,24 @@ def get_retriever():
|
||||||
def _build_context_from_hits(hits: List[QueryHit]) -> str:
|
def _build_context_from_hits(hits: List[QueryHit]) -> str:
|
||||||
"""
|
"""
|
||||||
Formatiert die Suchtreffer zu einem String für den Prompt.
|
Formatiert die Suchtreffer zu einem String für den Prompt.
|
||||||
Extrahiert Text aus hit.source (wo der Chunk-Inhalt liegt).
|
|
||||||
"""
|
"""
|
||||||
context_parts = []
|
context_parts = []
|
||||||
for i, hit in enumerate(hits, 1):
|
for i, hit in enumerate(hits, 1):
|
||||||
# Wir versuchen, den Text aus verschiedenen gängigen Feldern zu holen
|
|
||||||
source = hit.source or {}
|
source = hit.source or {}
|
||||||
content = source.get("text") or source.get("content") or "No text content available."
|
# Wir probieren alle möglichen Felder, in denen Text stecken könnte
|
||||||
|
content = (
|
||||||
|
source.get("text") or
|
||||||
|
source.get("content") or
|
||||||
|
source.get("page_content") or
|
||||||
|
source.get("chunk_text") or # Oft verwendet
|
||||||
|
"[[LEERER INHALT - PAYLOAD PRÜFEN]]"
|
||||||
|
)
|
||||||
|
|
||||||
title = hit.note_id or "Unknown Note"
|
title = hit.note_id or "Unknown Note"
|
||||||
|
|
||||||
# Formatierung:
|
# Debug Log für jeden Hit
|
||||||
# [1] Titel der Notiz (Score: 0.85)
|
logger.info(f"Building Context [{i}]: ID={hit.node_id} Content-Length={len(str(content))}")
|
||||||
# Inhalt...
|
|
||||||
entry = (
|
entry = (
|
||||||
f"SOURCE [{i}]: {title} (Score: {hit.total_score:.2f})\n"
|
f"SOURCE [{i}]: {title} (Score: {hit.total_score:.2f})\n"
|
||||||
f"CONTENT: {content}\n"
|
f"CONTENT: {content}\n"
|
||||||
|
|
@ -69,17 +66,14 @@ async def chat_endpoint(
|
||||||
logger.info(f"Chat request [{query_id}]: {request.message}")
|
logger.info(f"Chat request [{query_id}]: {request.message}")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# 1. Retrieval: Wir nutzen den existierenden Retriever
|
# 1. Retrieval
|
||||||
# Wir mappen ChatRequest auf QueryRequest (WP-04 Logik)
|
|
||||||
query_req = QueryRequest(
|
query_req = QueryRequest(
|
||||||
query=request.message,
|
query=request.message,
|
||||||
mode="hybrid", # Hybrid ist am robustesten für RAG
|
mode="hybrid",
|
||||||
top_k=request.top_k,
|
top_k=request.top_k,
|
||||||
explain=request.explain # Traceability weitergeben
|
explain=request.explain
|
||||||
)
|
)
|
||||||
|
|
||||||
# Retrieval ausführen (retriever.search erwartet QueryRequest)
|
|
||||||
# Hinweis: retrieve_result ist vom Typ QueryResponse (aus DTO)
|
|
||||||
retrieve_result = await retriever.search(query_req)
|
retrieve_result = await retriever.search(query_req)
|
||||||
hits = retrieve_result.results
|
hits = retrieve_result.results
|
||||||
|
|
||||||
|
|
@ -89,19 +83,21 @@ async def chat_endpoint(
|
||||||
context_str = "Keine relevanten Notizen gefunden."
|
context_str = "Keine relevanten Notizen gefunden."
|
||||||
else:
|
else:
|
||||||
context_str = _build_context_from_hits(hits)
|
context_str = _build_context_from_hits(hits)
|
||||||
|
|
||||||
|
# WICHTIG: Wir loggen den ersten Teil des Kontextes, um zu sehen, was das LLM bekommt
|
||||||
|
logger.info(f"--- LLM CONTEXT PREVIEW ---\n{context_str[:500]}\n--- END PREVIEW ---")
|
||||||
|
|
||||||
# 3. LLM Generation
|
# 3. LLM Generation
|
||||||
logger.info(f"[{query_id}] Generating answer with {len(hits)} context chunks...")
|
|
||||||
answer_text = await llm.generate_rag_response(
|
answer_text = await llm.generate_rag_response(
|
||||||
query=request.message,
|
query=request.message,
|
||||||
context_str=context_str
|
context_str=context_str
|
||||||
)
|
)
|
||||||
|
|
||||||
# 4. Response bauen
|
# 4. Response
|
||||||
duration_ms = int((time.time() - start_time) * 1000)
|
duration_ms = int((time.time() - start_time) * 1000)
|
||||||
|
|
||||||
return ChatResponse(
|
return ChatResponse(
|
||||||
query_id=retrieve_result.query_id, # Wir nutzen die ID vom Retriever für Konsistenz
|
query_id=retrieve_result.query_id,
|
||||||
answer=answer_text,
|
answer=answer_text,
|
||||||
sources=hits,
|
sources=hits,
|
||||||
latency_ms=duration_ms
|
latency_ms=duration_ms
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user