All checks were successful
Deploy mindnet to llm-node / deploy (push) Successful in 5s
395 lines
15 KiB
Python
395 lines
15 KiB
Python
"""
|
|
FILE: app/routers/chat.py
|
|
DESCRIPTION: Haupt-Chat-Interface (RAG & Interview). Enthält Intent-Router (Keywords/LLM) und Prompt-Construction.
|
|
VERSION: 2.7.8 (Full Unabridged Stability Edition)
|
|
STATUS: Active
|
|
FIX:
|
|
1. Implementiert Context-Throttling für Ollama (MAX_OLLAMA_CHARS).
|
|
2. Deaktiviert LLM-Retries für den Chat (max_retries=0).
|
|
3. Behebt Double-Fallback-Schleifen und Silent Refusals.
|
|
"""
|
|
|
|
from fastapi import APIRouter, HTTPException, Depends
|
|
from typing import List, Dict, Any, Optional
|
|
import time
|
|
import uuid
|
|
import logging
|
|
import yaml
|
|
import os
|
|
from pathlib import Path
|
|
|
|
from app.config import get_settings
|
|
from app.models.dto import ChatRequest, ChatResponse, QueryRequest, QueryHit
|
|
from app.services.llm_service import LLMService
|
|
from app.core.retriever import Retriever
|
|
from app.services.feedback_service import log_search
|
|
|
|
router = APIRouter()
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# --- Helper: Config Loader ---
|
|
|
|
_DECISION_CONFIG_CACHE = None
|
|
_TYPES_CONFIG_CACHE = None
|
|
|
|
def _load_decision_config() -> Dict[str, Any]:
|
|
settings = get_settings()
|
|
path = Path(settings.DECISION_CONFIG_PATH)
|
|
default_config = {
|
|
"strategies": {
|
|
"FACT": {"trigger_keywords": [], "preferred_provider": "openrouter"}
|
|
}
|
|
}
|
|
|
|
if not path.exists():
|
|
logger.warning(f"Decision config not found at {path}, using defaults.")
|
|
return default_config
|
|
|
|
try:
|
|
with open(path, "r", encoding="utf-8") as f:
|
|
return yaml.safe_load(f)
|
|
except Exception as e:
|
|
logger.error(f"Failed to load decision config: {e}")
|
|
return default_config
|
|
|
|
def _load_types_config() -> Dict[str, Any]:
|
|
"""Lädt die types.yaml für Keyword-Erkennung."""
|
|
path = os.getenv("MINDNET_TYPES_FILE", "config/types.yaml")
|
|
try:
|
|
with open(path, "r", encoding="utf-8") as f:
|
|
return yaml.safe_load(f) or {}
|
|
except Exception:
|
|
return {}
|
|
|
|
def get_full_config() -> Dict[str, Any]:
|
|
global _DECISION_CONFIG_CACHE
|
|
if _DECISION_CONFIG_CACHE is None:
|
|
_DECISION_CONFIG_CACHE = _load_decision_config()
|
|
return _DECISION_CONFIG_CACHE
|
|
|
|
def get_types_config() -> Dict[str, Any]:
|
|
global _TYPES_CONFIG_CACHE
|
|
if _TYPES_CONFIG_CACHE is None:
|
|
_TYPES_CONFIG_CACHE = _load_types_config()
|
|
return _TYPES_CONFIG_CACHE
|
|
|
|
def get_decision_strategy(intent: str) -> Dict[str, Any]:
|
|
config = get_full_config()
|
|
strategies = config.get("strategies", {})
|
|
return strategies.get(intent, strategies.get("FACT", {}))
|
|
|
|
# --- Helper: Target Type Detection (WP-07) ---
|
|
|
|
def _detect_target_type(message: str, configured_schemas: Dict[str, Any]) -> str:
|
|
"""
|
|
Versucht zu erraten, welchen Notiz-Typ der User erstellen will.
|
|
Nutzt Keywords aus types.yaml UND Mappings.
|
|
"""
|
|
message_lower = message.lower()
|
|
|
|
# 1. Check types.yaml detection_keywords (Priority!)
|
|
types_cfg = get_types_config()
|
|
types_def = types_cfg.get("types", {})
|
|
|
|
for type_name, type_data in types_def.items():
|
|
keywords = type_data.get("detection_keywords", [])
|
|
for kw in keywords:
|
|
if kw.lower() in message_lower:
|
|
return type_name
|
|
|
|
# 2. Direkter Match mit Schema-Keys
|
|
for type_key in configured_schemas.keys():
|
|
if type_key == "default": continue
|
|
if type_key in message_lower:
|
|
return type_key
|
|
|
|
# 3. Synonym-Mapping (Legacy Fallback)
|
|
synonyms = {
|
|
"projekt": "project", "vorhaben": "project",
|
|
"entscheidung": "decision", "beschluss": "decision",
|
|
"ziel": "goal",
|
|
"erfahrung": "experience", "lektion": "experience",
|
|
"wert": "value",
|
|
"prinzip": "principle",
|
|
"notiz": "default", "idee": "default"
|
|
}
|
|
|
|
for term, schema_key in synonyms.items():
|
|
if term in message_lower:
|
|
return schema_key
|
|
|
|
return "default"
|
|
|
|
# --- Dependencies ---
|
|
|
|
def get_llm_service():
|
|
return LLMService()
|
|
|
|
def get_retriever():
|
|
return Retriever()
|
|
|
|
|
|
# --- Logic ---
|
|
|
|
def _build_enriched_context(hits: List[QueryHit]) -> str:
|
|
context_parts = []
|
|
for i, hit in enumerate(hits, 1):
|
|
source = hit.source or {}
|
|
content = (
|
|
source.get("text") or source.get("content") or
|
|
source.get("page_content") or source.get("chunk_text") or
|
|
"[Kein Text]"
|
|
)
|
|
title = hit.note_id or "Unbekannt"
|
|
|
|
payload = hit.payload or {}
|
|
note_type = payload.get("type") or source.get("type", "unknown")
|
|
note_type = str(note_type).upper()
|
|
|
|
entry = (
|
|
f"### QUELLE {i}: {title}\n"
|
|
f"TYP: [{note_type}] (Score: {hit.total_score:.2f})\n"
|
|
f"INHALT:\n{content}\n"
|
|
)
|
|
context_parts.append(entry)
|
|
|
|
return "\n\n".join(context_parts)
|
|
|
|
def _is_question(query: str) -> bool:
|
|
"""Prüft, ob der Input wahrscheinlich eine Frage ist."""
|
|
q = query.strip().lower()
|
|
if "?" in q: return True
|
|
|
|
# W-Fragen Indikatoren
|
|
starters = ["wer", "wie", "was", "wo", "wann", "warum", "weshalb", "wozu", "welche", "bist du", "entspricht"]
|
|
if any(q.startswith(s + " ") for s in starters):
|
|
return True
|
|
|
|
return False
|
|
|
|
async def _classify_intent(query: str, llm: LLMService) -> tuple[str, str]:
|
|
"""
|
|
Hybrid Router v5:
|
|
1. Decision Keywords (Strategie) -> Prio 1
|
|
2. Type Keywords (Interview Trigger) -> Prio 2
|
|
3. LLM (Fallback) -> Prio 3
|
|
"""
|
|
config = get_full_config()
|
|
strategies = config.get("strategies", {})
|
|
settings = config.get("settings", {})
|
|
|
|
query_lower = query.lower()
|
|
|
|
# 1. FAST PATH A: Strategie Keywords
|
|
for intent_name, strategy in strategies.items():
|
|
if intent_name == "FACT": continue
|
|
keywords = strategy.get("trigger_keywords", [])
|
|
for k in keywords:
|
|
if k.lower() in query_lower:
|
|
return intent_name, "Keyword (Strategy)"
|
|
|
|
# 2. FAST PATH B: Type Keywords -> INTERVIEW
|
|
if not _is_question(query_lower):
|
|
types_cfg = get_types_config()
|
|
types_def = types_cfg.get("types", {})
|
|
|
|
for type_name, type_data in types_def.items():
|
|
keywords = type_data.get("detection_keywords", [])
|
|
for kw in keywords:
|
|
if kw.lower() in query_lower:
|
|
return "INTERVIEW", f"Keyword (Type: {type_name})"
|
|
|
|
# 3. SLOW PATH: LLM Router
|
|
if settings.get("llm_fallback_enabled", False):
|
|
router_prompt_template = llm.get_prompt("llm_router_prompt")
|
|
|
|
if router_prompt_template:
|
|
prompt = router_prompt_template.replace("{query}", query)
|
|
logger.info("Keywords failed (or Question detected). Asking LLM for Intent...")
|
|
|
|
try:
|
|
# FIX: Auch beim Routing keine Retries im Chat-Fluss
|
|
raw_response = await llm.generate_raw_response(prompt, priority="realtime", max_retries=0)
|
|
llm_output_upper = raw_response.upper()
|
|
|
|
if "INTERVIEW" in llm_output_upper or "CREATE" in llm_output_upper:
|
|
return "INTERVIEW", "LLM Router"
|
|
|
|
for strat_key in strategies.keys():
|
|
if strat_key in llm_output_upper:
|
|
return strat_key, "LLM Router"
|
|
|
|
except Exception as e:
|
|
logger.error(f"Router LLM failed: {e}")
|
|
|
|
return "FACT", "Default (No Match)"
|
|
|
|
@router.post("/", response_model=ChatResponse)
|
|
async def chat_endpoint(
|
|
request: ChatRequest,
|
|
llm: LLMService = Depends(get_llm_service),
|
|
retriever: Retriever = Depends(get_retriever)
|
|
):
|
|
start_time = time.time()
|
|
query_id = str(uuid.uuid4())
|
|
logger.info(f"Chat request [{query_id}]: {request.message[:50]}...")
|
|
|
|
try:
|
|
# 1. Intent Detection
|
|
intent, intent_source = await _classify_intent(request.message, llm)
|
|
logger.info(f"[{query_id}] Final Intent: {intent} via {intent_source}")
|
|
|
|
# Strategy Load
|
|
strategy = get_decision_strategy(intent)
|
|
prompt_key = strategy.get("prompt_template", "rag_template")
|
|
preferred_provider = strategy.get("preferred_provider")
|
|
|
|
sources_hits = []
|
|
final_prompt = ""
|
|
context_str = ""
|
|
|
|
if intent == "INTERVIEW":
|
|
# --- INTERVIEW MODE ---
|
|
target_type = _detect_target_type(request.message, strategy.get("schemas", {}))
|
|
|
|
types_cfg = get_types_config()
|
|
type_def = types_cfg.get("types", {}).get(target_type, {})
|
|
fields_list = type_def.get("schema", [])
|
|
|
|
if not fields_list:
|
|
configured_schemas = strategy.get("schemas", {})
|
|
fallback_schema = configured_schemas.get(target_type, configured_schemas.get("default"))
|
|
if isinstance(fallback_schema, dict):
|
|
fields_list = fallback_schema.get("fields", [])
|
|
else:
|
|
fields_list = fallback_schema or []
|
|
|
|
logger.info(f"[{query_id}] Interview Type: {target_type}. Fields: {len(fields_list)}")
|
|
fields_str = "\n- " + "\n- ".join(fields_list)
|
|
|
|
template = llm.get_prompt(prompt_key)
|
|
final_prompt = template.replace("{context_str}", "Dialogverlauf...") \
|
|
.replace("{query}", request.message) \
|
|
.replace("{target_type}", target_type) \
|
|
.replace("{schema_fields}", fields_str) \
|
|
.replace("{schema_hint}", "")
|
|
sources_hits = []
|
|
|
|
else:
|
|
# --- RAG MODE (FACT, DECISION, EMPATHY, CODING) ---
|
|
inject_types = strategy.get("inject_types", [])
|
|
prepend_instr = strategy.get("prepend_instruction", "")
|
|
edge_boosts = strategy.get("edge_boosts", {})
|
|
|
|
query_req = QueryRequest(
|
|
query=request.message,
|
|
mode="hybrid",
|
|
top_k=request.top_k,
|
|
explain=request.explain,
|
|
boost_edges=edge_boosts
|
|
)
|
|
retrieve_result = await retriever.search(query_req)
|
|
hits = retrieve_result.results
|
|
|
|
if inject_types:
|
|
strategy_req = QueryRequest(
|
|
query=request.message,
|
|
mode="hybrid",
|
|
top_k=3,
|
|
filters={"type": inject_types},
|
|
explain=False,
|
|
boost_edges=edge_boosts
|
|
)
|
|
strategy_result = await retriever.search(strategy_req)
|
|
existing_ids = {h.node_id for h in hits}
|
|
for strat_hit in strategy_result.results:
|
|
if strat_hit.node_id not in existing_ids:
|
|
hits.append(strat_hit)
|
|
|
|
context_str = _build_enriched_context(hits) if hits else "Keine relevanten Notizen gefunden."
|
|
|
|
# --- STABILITY FIX: OLLAMA CONTEXT THROTTLE ---
|
|
# Begrenzt den Text, um den "decode: cannot decode batches" Fehler zu vermeiden.
|
|
# MAX_OLLAMA_CHARS = 10000
|
|
|
|
settings = get_settings() # Falls noch nicht im Scope vorhanden
|
|
max_chars = getattr(settings, "MAX_OLLAMA_CHARS", 10000)
|
|
if preferred_provider == "ollama" and len(context_str) > max_chars:
|
|
logger.warning(f"⚠️ [{query_id}] Context zu groß für Ollama ({len(context_str)} chars). Kürze auf {max_chars}.")
|
|
context_str = context_str[:max_chars] + "\n[...gekürzt zur Stabilität...]"
|
|
|
|
template = llm.get_prompt(prompt_key) or "{context_str}\n\n{query}"
|
|
|
|
if prepend_instr:
|
|
context_str = f"{prepend_instr}\n\n{context_str}"
|
|
|
|
final_prompt = template.replace("{context_str}", context_str).replace("{query}", request.message)
|
|
sources_hits = hits
|
|
|
|
# --- DEBUG SPOT 1: PROMPT CONSTRUCTION ---
|
|
logger.info(f"[{query_id}] PROMPT CONSTRUCTION COMPLETE. Length: {len(final_prompt)} chars.")
|
|
if not final_prompt.strip():
|
|
logger.error(f"[{query_id}] CRITICAL: Final prompt is empty before sending to LLM!")
|
|
|
|
# --- GENERATION WITH NO-RETRY & DEEP FALLBACK ---
|
|
system_prompt = llm.get_prompt("system_prompt")
|
|
|
|
# --- DEBUG SPOT 2: PRIMARY CALL ---
|
|
logger.info(f"[{query_id}] PRIMARY CALL: Sending request to provider '{preferred_provider}' (No Retries)...")
|
|
|
|
answer_text = ""
|
|
try:
|
|
# FIX: max_retries=0 verhindert Hänger durch Retry-Kaskaden im Chat
|
|
answer_text = await llm.generate_raw_response(
|
|
prompt=final_prompt,
|
|
system=system_prompt,
|
|
priority="realtime",
|
|
provider=preferred_provider,
|
|
max_retries=0
|
|
)
|
|
except Exception as e:
|
|
logger.error(f"🛑 [{query_id}] Primary Provider '{preferred_provider}' failed: {e}")
|
|
|
|
# DEEP FALLBACK: Wenn die Antwort leer ist (Silent Refusal) oder der Primary abgestürzt ist
|
|
if not answer_text.strip() and preferred_provider != "ollama":
|
|
# --- DEBUG SPOT 3: FALLBACK TRIGGER ---
|
|
logger.warning(f"🛑 [{query_id}] PRIMARY '{preferred_provider}' returned EMPTY or FAILED. Triggering Deep Fallback to Ollama...")
|
|
|
|
try:
|
|
answer_text = await llm.generate_raw_response(
|
|
prompt=final_prompt,
|
|
system=system_prompt,
|
|
priority="realtime",
|
|
provider="ollama",
|
|
max_retries=0
|
|
)
|
|
except Exception as e:
|
|
logger.error(f"🛑 [{query_id}] Deep Fallback to Ollama also failed: {e}")
|
|
answer_text = "Entschuldigung, das System ist aktuell überlastet. Bitte versuche es in einem Moment erneut."
|
|
|
|
duration_ms = int((time.time() - start_time) * 1000)
|
|
|
|
# Logging
|
|
try:
|
|
log_search(
|
|
query_id=query_id,
|
|
query_text=request.message,
|
|
results=sources_hits,
|
|
mode="interview" if intent == "INTERVIEW" else "chat_rag",
|
|
metadata={"intent": intent, "source": intent_source, "provider": preferred_provider}
|
|
)
|
|
except: pass
|
|
|
|
return ChatResponse(
|
|
query_id=query_id,
|
|
answer=answer_text,
|
|
sources=sources_hits,
|
|
latency_ms=duration_ms,
|
|
intent=intent,
|
|
intent_source=intent_source
|
|
)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error in chat endpoint: {e}", exc_info=True)
|
|
# Wir geben eine benutzerfreundliche Meldung zurück, statt nur den Error-Stack
|
|
raise HTTPException(status_code=500, detail="Das System konnte die Anfrage nicht verarbeiten.") |