WP10 #6

Merged
Lars merged 9 commits from WP10 into main 2025-12-10 10:29:10 +01:00
2 changed files with 77 additions and 32 deletions
Showing only changes of commit 76ea8e3350 - Show all commits

View File

@ -1,6 +1,12 @@
""" """
app/routers/chat.py RAG Endpunkt (WP-06 Hybrid Router v3) app/routers/chat.py RAG Endpunkt (WP-06 Hybrid Router + WP-04c Feedback)
Update: Transparenz über Intent-Source (Keyword vs. LLM). Version: 2.3.2 (Merged Stability Patch)
Features:
- Hybrid Intent Router (Keyword + LLM)
- Strategic Retrieval (Late Binding via Config)
- Context Enrichment (Payload/Source Fallback)
- Data Flywheel (Feedback Logging Integration)
""" """
from fastapi import APIRouter, HTTPException, Depends from fastapi import APIRouter, HTTPException, Depends
@ -15,6 +21,8 @@ from app.config import get_settings
from app.models.dto import ChatRequest, ChatResponse, QueryRequest, QueryHit from app.models.dto import ChatRequest, ChatResponse, QueryRequest, QueryHit
from app.services.llm_service import LLMService from app.services.llm_service import LLMService
from app.core.retriever import Retriever from app.core.retriever import Retriever
# [MERGE] Integration Feedback Service (WP-04c)
from app.services.feedback_service import log_search
router = APIRouter() router = APIRouter()
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -77,7 +85,7 @@ def _build_enriched_context(hits: List[QueryHit]) -> str:
) )
title = hit.note_id or "Unbekannt" title = hit.note_id or "Unbekannt"
# FIX: Wir holen den Typ aus Payload oder Source (Fallback) # [FIX] Robustes Auslesen des Typs (Payload > Source > Unknown)
payload = hit.payload or {} payload = hit.payload or {}
note_type = payload.get("type") or source.get("type", "unknown") note_type = payload.get("type") or source.get("type", "unknown")
note_type = str(note_type).upper() note_type = str(note_type).upper()
@ -173,7 +181,7 @@ async def chat_endpoint(
retrieve_result = await retriever.search(query_req) retrieve_result = await retriever.search(query_req)
hits = retrieve_result.results hits = retrieve_result.results
# 3. Strategic Retrieval # 3. Strategic Retrieval (WP-06 Kernfeature)
if inject_types: if inject_types:
logger.info(f"[{query_id}] Executing Strategic Retrieval for types: {inject_types}...") logger.info(f"[{query_id}] Executing Strategic Retrieval for types: {inject_types}...")
strategy_req = QueryRequest( strategy_req = QueryRequest(
@ -207,18 +215,37 @@ async def chat_endpoint(
logger.info(f"[{query_id}] Sending to LLM (Intent: {intent}, Template: {prompt_key})...") logger.info(f"[{query_id}] Sending to LLM (Intent: {intent}, Template: {prompt_key})...")
# System-Prompt separat übergeben # System-Prompt separat übergeben (WP-06a Fix)
answer_text = await llm.generate_raw_response(prompt=final_prompt, system=system_prompt) answer_text = await llm.generate_raw_response(prompt=final_prompt, system=system_prompt)
duration_ms = int((time.time() - start_time) * 1000) duration_ms = int((time.time() - start_time) * 1000)
# 6. Logging (Fire & Forget) - [MERGE POINT]
# Wir loggen alles für das Data Flywheel (WP-08 Self-Tuning)
try:
log_search(
query_id=query_id,
query_text=request.message,
results=hits,
mode="chat_rag",
metadata={
"intent": intent,
"intent_source": intent_source,
"generated_answer": answer_text,
"model": llm.settings.LLM_MODEL
}
)
except Exception as e:
logger.error(f"Logging failed: {e}")
# 7. Response
return ChatResponse( return ChatResponse(
query_id=query_id, query_id=query_id,
answer=answer_text, answer=answer_text,
sources=hits, sources=hits,
latency_ms=duration_ms, latency_ms=duration_ms,
intent=intent, intent=intent,
intent_source=intent_source # Source durchreichen intent_source=intent_source
) )
except Exception as e: except Exception as e:

View File

@ -2,13 +2,18 @@
app/services/feedback_service.py app/services/feedback_service.py
Service zum Loggen von Suchanfragen und Feedback (WP-04c). Service zum Loggen von Suchanfragen und Feedback (WP-04c).
Speichert Daten als JSONL für späteres Self-Tuning (WP-08). Speichert Daten als JSONL für späteres Self-Tuning (WP-08).
Version: 1.1 (Chat-Support)
""" """
import json import json
import os import os
import time import time
import logging
from pathlib import Path from pathlib import Path
from typing import Dict, Any, List from typing import Dict, Any, List, Union
from app.models.dto import QueryRequest, QueryResponse, FeedbackRequest from app.models.dto import QueryRequest, QueryResponse, FeedbackRequest, QueryHit
logger = logging.getLogger(__name__)
# Pfad für Logs (lokal auf dem Beelink/PC) # Pfad für Logs (lokal auf dem Beelink/PC)
LOG_DIR = Path("data/logs") LOG_DIR = Path("data/logs")
@ -19,18 +24,35 @@ def _ensure_log_dir():
if not LOG_DIR.exists(): if not LOG_DIR.exists():
os.makedirs(LOG_DIR, exist_ok=True) os.makedirs(LOG_DIR, exist_ok=True)
def log_search(req: QueryRequest, res: QueryResponse): def _append_jsonl(file_path: Path, data: dict):
try:
with open(file_path, "a", encoding="utf-8") as f:
f.write(json.dumps(data, ensure_ascii=False) + "\n")
except Exception as e:
logger.error(f"Failed to write log: {e}")
def log_search(
query_id: str,
query_text: str,
results: List[QueryHit],
mode: str = "unknown",
metadata: Dict[str, Any] = None
):
""" """
Speichert den "Snapshot" der Suche. Generische Logging-Funktion für Suche UND Chat.
WICHTIG: Wir speichern die Scores (Breakdown), damit wir später wissen,
warum das System so entschieden hat. Args:
query_id: UUID der Anfrage.
query_text: User-Eingabe.
results: Liste der Treffer (QueryHit Objekte).
mode: z.B. "semantic", "hybrid", "chat_rag".
metadata: Zusätzliche Infos (z.B. generierte Antwort, Intent).
""" """
_ensure_log_dir() _ensure_log_dir()
# Wir reduzieren die Datenmenge etwas (z.B. keine vollen Texte)
hits_summary = [] hits_summary = []
for hit in res.results: for hit in results:
# Falls Explanation an war, speichern wir den Breakdown, sonst die Scores # Pydantic Model Dump für saubere Serialisierung
breakdown = None breakdown = None
if hit.explanation and hit.explanation.breakdown: if hit.explanation and hit.explanation.breakdown:
breakdown = hit.explanation.breakdown.model_dump() breakdown = hit.explanation.breakdown.model_dump()
@ -39,25 +61,24 @@ def log_search(req: QueryRequest, res: QueryResponse):
"node_id": hit.node_id, "node_id": hit.node_id,
"note_id": hit.note_id, "note_id": hit.note_id,
"total_score": hit.total_score, "total_score": hit.total_score,
"breakdown": breakdown, # Wichtig für Training! "breakdown": breakdown,
"rank_semantic": hit.semantic_score, "rank_semantic": hit.semantic_score,
"rank_edge": hit.edge_bonus "rank_edge": hit.edge_bonus,
"type": hit.source.get("type") if hit.source else "unknown"
}) })
entry = { entry = {
"timestamp": time.time(), "timestamp": time.time(),
"query_id": res.query_id, "query_id": query_id,
"query_text": req.query, "query_text": query_text,
"mode": req.mode, "mode": mode,
"top_k": req.top_k, "hits_count": len(hits_summary),
"hits": hits_summary "hits": hits_summary,
"metadata": metadata or {}
} }
try: _append_jsonl(SEARCH_LOG_FILE, entry)
with open(SEARCH_LOG_FILE, "a", encoding="utf-8") as f: logger.info(f"Logged search/chat interaction {query_id}")
f.write(json.dumps(entry, ensure_ascii=False) + "\n")
except Exception as e:
print(f"ERROR logging search: {e}")
def log_feedback(fb: FeedbackRequest): def log_feedback(fb: FeedbackRequest):
""" """
@ -73,8 +94,5 @@ def log_feedback(fb: FeedbackRequest):
"comment": fb.comment "comment": fb.comment
} }
try: _append_jsonl(FEEDBACK_LOG_FILE, entry)
with open(FEEDBACK_LOG_FILE, "a", encoding="utf-8") as f: logger.info(f"Logged feedback for {fb.query_id}")
f.write(json.dumps(entry, ensure_ascii=False) + "\n")
except Exception as e:
print(f"ERROR logging feedback: {e}")