mindnet/app/routers/chat.py

353 lines
13 KiB
Python

"""
app/routers/chat.py — RAG Endpunkt (WP-06 Hybrid Router + WP-07 Interview Mode)
Version: 2.4.1 (Fix: Type-based Intent Detection)
Features:
- Hybrid Intent Router (Keyword + LLM)
- Strategic Retrieval (Late Binding via Config)
- Interview Loop (Schema-driven Data Collection)
- Context Enrichment (Payload/Source Fallback)
- Data Flywheel (Feedback Logging Integration)
- NEU: Lädt detection_keywords aus types.yaml für präzise Erkennung.
"""
from fastapi import APIRouter, HTTPException, Depends
from typing import List, Dict, Any, Optional
import time
import uuid
import logging
import yaml
import os
from pathlib import Path
from app.config import get_settings
from app.models.dto import ChatRequest, ChatResponse, QueryRequest, QueryHit
from app.services.llm_service import LLMService
from app.core.retriever import Retriever
from app.services.feedback_service import log_search
router = APIRouter()
logger = logging.getLogger(__name__)
# --- Helper: Config Loader ---
_DECISION_CONFIG_CACHE = None
_TYPES_CONFIG_CACHE = None
def _load_decision_config() -> Dict[str, Any]:
settings = get_settings()
path = Path(settings.DECISION_CONFIG_PATH)
default_config = {
"strategies": {
"FACT": {"trigger_keywords": []}
}
}
if not path.exists():
logger.warning(f"Decision config not found at {path}, using defaults.")
return default_config
try:
with open(path, "r", encoding="utf-8") as f:
return yaml.safe_load(f)
except Exception as e:
logger.error(f"Failed to load decision config: {e}")
return default_config
def _load_types_config() -> Dict[str, Any]:
"""Lädt die types.yaml für Keyword-Erkennung."""
path = os.getenv("MINDNET_TYPES_FILE", "config/types.yaml")
try:
with open(path, "r", encoding="utf-8") as f:
return yaml.safe_load(f) or {}
except Exception:
return {}
def get_full_config() -> Dict[str, Any]:
global _DECISION_CONFIG_CACHE
if _DECISION_CONFIG_CACHE is None:
_DECISION_CONFIG_CACHE = _load_decision_config()
return _DECISION_CONFIG_CACHE
def get_types_config() -> Dict[str, Any]:
global _TYPES_CONFIG_CACHE
if _TYPES_CONFIG_CACHE is None:
_TYPES_CONFIG_CACHE = _load_types_config()
return _TYPES_CONFIG_CACHE
def get_decision_strategy(intent: str) -> Dict[str, Any]:
config = get_full_config()
strategies = config.get("strategies", {})
# Fallback: Wenn Intent INTERVIEW ist, aber nicht konfiguriert, nehme FACT
# (Aber INTERVIEW sollte in decision_engine.yaml stehen!)
return strategies.get(intent, strategies.get("FACT", {}))
# --- Helper: Target Type Detection (WP-07) ---
def _detect_target_type(message: str, configured_schemas: Dict[str, Any]) -> str:
"""
Versucht zu erraten, welchen Notiz-Typ der User erstellen will.
Nutzt Keywords aus types.yaml UND Mappings.
"""
message_lower = message.lower()
# 1. Check types.yaml detection_keywords (Priority!)
types_cfg = get_types_config()
types_def = types_cfg.get("types", {})
for type_name, type_data in types_def.items():
keywords = type_data.get("detection_keywords", [])
for kw in keywords:
if kw.lower() in message_lower:
return type_name
# 2. Direkter Match mit Schema-Keys
for type_key in configured_schemas.keys():
if type_key == "default": continue
if type_key in message_lower:
return type_key
# 3. Synonym-Mapping (Legacy Fallback)
synonyms = {
"projekt": "project", "vorhaben": "project",
"entscheidung": "decision", "beschluss": "decision",
"ziel": "goal",
"erfahrung": "experience", "lektion": "experience",
"wert": "value",
"prinzip": "principle",
"notiz": "default", "idee": "default"
}
for term, schema_key in synonyms.items():
if term in message_lower:
return schema_key
return "default"
# --- Dependencies ---
def get_llm_service():
return LLMService()
def get_retriever():
return Retriever()
# --- Logic ---
def _build_enriched_context(hits: List[QueryHit]) -> str:
context_parts = []
for i, hit in enumerate(hits, 1):
source = hit.source or {}
content = (
source.get("text") or source.get("content") or
source.get("page_content") or source.get("chunk_text") or
"[Kein Text]"
)
title = hit.note_id or "Unbekannt"
payload = hit.payload or {}
note_type = payload.get("type") or source.get("type", "unknown")
note_type = str(note_type).upper()
entry = (
f"### QUELLE {i}: {title}\n"
f"TYP: [{note_type}] (Score: {hit.total_score:.2f})\n"
f"INHALT:\n{content}\n"
)
context_parts.append(entry)
return "\n\n".join(context_parts)
async def _classify_intent(query: str, llm: LLMService) -> tuple[str, str]:
"""
Hybrid Router v4:
1. Decision Keywords (Strategie)
2. Type Keywords (Interview Trigger)
3. LLM (Fallback)
"""
config = get_full_config()
strategies = config.get("strategies", {})
settings = config.get("settings", {})
query_lower = query.lower()
# 1. FAST PATH A: Strategie Keywords (z.B. "Soll ich...")
for intent_name, strategy in strategies.items():
if intent_name == "FACT": continue
keywords = strategy.get("trigger_keywords", [])
for k in keywords:
if k.lower() in query_lower:
return intent_name, "Keyword (Strategy)"
# 2. FAST PATH B: Type Keywords (z.B. "Projekt", "passiert") -> INTERVIEW
# Wir prüfen, ob ein Typ erkannt wird. Wenn ja -> Interview.
# Wir laden Schemas nicht hier, sondern nutzen types.yaml global
types_cfg = get_types_config()
types_def = types_cfg.get("types", {})
for type_name, type_data in types_def.items():
keywords = type_data.get("detection_keywords", [])
for kw in keywords:
if kw.lower() in query_lower:
return "INTERVIEW", f"Keyword (Type: {type_name})"
# 3. SLOW PATH: LLM Router
if settings.get("llm_fallback_enabled", False):
router_prompt_template = settings.get("llm_router_prompt", "")
if router_prompt_template:
prompt = router_prompt_template.replace("{query}", query)
logger.info("Keywords failed. Asking LLM for Intent...")
try:
raw_response = await llm.generate_raw_response(prompt)
llm_output_upper = raw_response.upper()
# Zuerst INTERVIEW prüfen (LLMs erkennen oft "Create" Intention)
if "INTERVIEW" in llm_output_upper or "CREATE" in llm_output_upper:
return "INTERVIEW", "LLM Router"
for strat_key in strategies.keys():
if strat_key in llm_output_upper:
return strat_key, "LLM Router"
except Exception as e:
logger.error(f"Router LLM failed: {e}")
return "FACT", "Default (No Match)"
@router.post("/", response_model=ChatResponse)
async def chat_endpoint(
request: ChatRequest,
llm: LLMService = Depends(get_llm_service),
retriever: Retriever = Depends(get_retriever)
):
start_time = time.time()
query_id = str(uuid.uuid4())
logger.info(f"Chat request [{query_id}]: {request.message[:50]}...")
try:
# 1. Intent Detection
intent, intent_source = await _classify_intent(request.message, llm)
logger.info(f"[{query_id}] Final Intent: {intent} via {intent_source}")
# Strategy Load
strategy = get_decision_strategy(intent)
prompt_key = strategy.get("prompt_template", "rag_template")
sources_hits = []
final_prompt = ""
if intent == "INTERVIEW":
# --- INTERVIEW MODE ---
# Wir müssen jetzt herausfinden, WELCHES Schema wir nutzen.
# Dazu schauen wir wieder in die types.yaml (via _detect_target_type)
# Schemas aus decision_engine.yaml laden (falls dort overrides sind)
# oder generisch aus types.yaml bauen (besser!)
# Strategie: Wir nutzen _detect_target_type, das jetzt types.yaml kennt.
target_type = _detect_target_type(request.message, strategy.get("schemas", {}))
# Schema laden (aus types.yaml bevorzugt)
types_cfg = get_types_config()
type_def = types_cfg.get("types", {}).get(target_type, {})
# Hole Schema-Felder aus types.yaml (schema: [...])
fields_list = type_def.get("schema", [])
# Fallback auf decision_engine.yaml, falls in types.yaml nichts steht
if not fields_list:
configured_schemas = strategy.get("schemas", {})
fallback_schema = configured_schemas.get(target_type, configured_schemas.get("default"))
if isinstance(fallback_schema, dict):
fields_list = fallback_schema.get("fields", [])
else:
fields_list = fallback_schema or []
logger.info(f"[{query_id}] Interview Type: {target_type}. Fields: {len(fields_list)}")
fields_str = "\n- " + "\n- ".join(fields_list)
# Prompt Assembly
template = llm.prompts.get(prompt_key, "")
final_prompt = template.replace("{context_str}", "Dialogverlauf...") \
.replace("{query}", request.message) \
.replace("{target_type}", target_type) \
.replace("{schema_fields}", fields_str) \
.replace("{schema_hint}", "")
sources_hits = []
else:
# --- RAG MODE ---
inject_types = strategy.get("inject_types", [])
prepend_instr = strategy.get("prepend_instruction", "")
query_req = QueryRequest(
query=request.message,
mode="hybrid",
top_k=request.top_k,
explain=request.explain
)
retrieve_result = await retriever.search(query_req)
hits = retrieve_result.results
if inject_types:
strategy_req = QueryRequest(
query=request.message,
mode="hybrid",
top_k=3,
filters={"type": inject_types},
explain=False
)
strategy_result = await retriever.search(strategy_req)
existing_ids = {h.node_id for h in hits}
for strat_hit in strategy_result.results:
if strat_hit.node_id not in existing_ids:
hits.append(strat_hit)
if not hits:
context_str = "Keine relevanten Notizen gefunden."
else:
context_str = _build_enriched_context(hits)
template = llm.prompts.get(prompt_key, "{context_str}\n\n{query}")
if prepend_instr:
context_str = f"{prepend_instr}\n\n{context_str}"
final_prompt = template.replace("{context_str}", context_str).replace("{query}", request.message)
sources_hits = hits
# --- GENERATION ---
system_prompt = llm.prompts.get("system_prompt", "")
# Hier nutzen wir das erhöhte Timeout aus dem LLMService Update
answer_text = await llm.generate_raw_response(prompt=final_prompt, system=system_prompt)
duration_ms = int((time.time() - start_time) * 1000)
# Logging
try:
log_search(
query_id=query_id,
query_text=request.message,
results=sources_hits,
mode="interview" if intent == "INTERVIEW" else "chat_rag",
metadata={"intent": intent, "source": intent_source}
)
except: pass
return ChatResponse(
query_id=query_id,
answer=answer_text,
sources=sources_hits,
latency_ms=duration_ms,
intent=intent,
intent_source=intent_source
)
except Exception as e:
logger.error(f"Error in chat endpoint: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=str(e))