""" FILE: app/routers/chat.py DESCRIPTION: Haupt-Chat-Interface (RAG & Interview). Enthält Intent-Router (Keywords/LLM) und Prompt-Construction. VERSION: 2.7.8 (Full Unabridged Stability Edition) STATUS: Active FIX: 1. Implementiert Context-Throttling für Ollama (MAX_OLLAMA_CHARS). 2. Deaktiviert LLM-Retries für den Chat (max_retries=0). 3. Behebt Double-Fallback-Schleifen und Silent Refusals. """ from fastapi import APIRouter, HTTPException, Depends from typing import List, Dict, Any, Optional import time import uuid import logging import yaml import os from pathlib import Path from app.config import get_settings from app.models.dto import ChatRequest, ChatResponse, QueryRequest, QueryHit from app.services.llm_service import LLMService from app.core.retrieval.retriever import Retriever from app.services.feedback_service import log_search router = APIRouter() logger = logging.getLogger(__name__) # --- Helper: Config Loader --- _DECISION_CONFIG_CACHE = None _TYPES_CONFIG_CACHE = None def _load_decision_config() -> Dict[str, Any]: settings = get_settings() path = Path(settings.DECISION_CONFIG_PATH) default_config = { "strategies": { "FACT": {"trigger_keywords": [], "preferred_provider": "openrouter"} } } if not path.exists(): logger.warning(f"Decision config not found at {path}, using defaults.") return default_config try: with open(path, "r", encoding="utf-8") as f: return yaml.safe_load(f) except Exception as e: logger.error(f"Failed to load decision config: {e}") return default_config def _load_types_config() -> Dict[str, Any]: """Lädt die types.yaml für Keyword-Erkennung.""" path = os.getenv("MINDNET_TYPES_FILE", "config/types.yaml") try: with open(path, "r", encoding="utf-8") as f: return yaml.safe_load(f) or {} except Exception: return {} def get_full_config() -> Dict[str, Any]: global _DECISION_CONFIG_CACHE if _DECISION_CONFIG_CACHE is None: _DECISION_CONFIG_CACHE = _load_decision_config() return _DECISION_CONFIG_CACHE def get_types_config() -> Dict[str, Any]: global _TYPES_CONFIG_CACHE if _TYPES_CONFIG_CACHE is None: _TYPES_CONFIG_CACHE = _load_types_config() return _TYPES_CONFIG_CACHE def get_decision_strategy(intent: str) -> Dict[str, Any]: config = get_full_config() strategies = config.get("strategies", {}) return strategies.get(intent, strategies.get("FACT", {})) # --- Helper: Target Type Detection (WP-07) --- def _detect_target_type(message: str, configured_schemas: Dict[str, Any]) -> str: """ Versucht zu erraten, welchen Notiz-Typ der User erstellen will. Nutzt Keywords aus types.yaml UND Mappings. """ message_lower = message.lower() # 1. Check types.yaml detection_keywords (Priority!) types_cfg = get_types_config() types_def = types_cfg.get("types", {}) for type_name, type_data in types_def.items(): keywords = type_data.get("detection_keywords", []) for kw in keywords: if kw.lower() in message_lower: return type_name # 2. Direkter Match mit Schema-Keys for type_key in configured_schemas.keys(): if type_key == "default": continue if type_key in message_lower: return type_key # 3. Synonym-Mapping (Legacy Fallback) synonyms = { "projekt": "project", "vorhaben": "project", "entscheidung": "decision", "beschluss": "decision", "ziel": "goal", "erfahrung": "experience", "lektion": "experience", "wert": "value", "prinzip": "principle", "notiz": "default", "idee": "default" } for term, schema_key in synonyms.items(): if term in message_lower: return schema_key return "default" # --- Dependencies --- def get_llm_service(): return LLMService() def get_retriever(): return Retriever() # --- Logic --- def _build_enriched_context(hits: List[QueryHit]) -> str: context_parts = [] for i, hit in enumerate(hits, 1): source = hit.source or {} content = ( source.get("text") or source.get("content") or source.get("page_content") or source.get("chunk_text") or "[Kein Text]" ) title = hit.note_id or "Unbekannt" payload = hit.payload or {} note_type = payload.get("type") or source.get("type", "unknown") note_type = str(note_type).upper() entry = ( f"### QUELLE {i}: {title}\n" f"TYP: [{note_type}] (Score: {hit.total_score:.2f})\n" f"INHALT:\n{content}\n" ) context_parts.append(entry) return "\n\n".join(context_parts) def _is_question(query: str) -> bool: """Prüft, ob der Input wahrscheinlich eine Frage ist.""" q = query.strip().lower() if "?" in q: return True # W-Fragen Indikatoren starters = ["wer", "wie", "was", "wo", "wann", "warum", "weshalb", "wozu", "welche", "bist du", "entspricht"] if any(q.startswith(s + " ") for s in starters): return True return False async def _classify_intent(query: str, llm: LLMService) -> tuple[str, str]: """ Hybrid Router v5: 1. Decision Keywords (Strategie) -> Prio 1 2. Type Keywords (Interview Trigger) -> Prio 2 3. LLM (Fallback) -> Prio 3 """ config = get_full_config() strategies = config.get("strategies", {}) settings = config.get("settings", {}) query_lower = query.lower() # 1. FAST PATH A: Strategie Keywords for intent_name, strategy in strategies.items(): if intent_name == "FACT": continue keywords = strategy.get("trigger_keywords", []) for k in keywords: if k.lower() in query_lower: return intent_name, "Keyword (Strategy)" # 2. FAST PATH B: Type Keywords -> INTERVIEW if not _is_question(query_lower): types_cfg = get_types_config() types_def = types_cfg.get("types", {}) for type_name, type_data in types_def.items(): keywords = type_data.get("detection_keywords", []) for kw in keywords: if kw.lower() in query_lower: return "INTERVIEW", f"Keyword (Type: {type_name})" # 3. SLOW PATH: LLM Router if settings.get("llm_fallback_enabled", False): router_prompt_template = llm.get_prompt("llm_router_prompt") if router_prompt_template: prompt = router_prompt_template.replace("{query}", query) logger.info("Keywords failed (or Question detected). Asking LLM for Intent...") try: # FIX: Auch beim Routing keine Retries im Chat-Fluss raw_response = await llm.generate_raw_response(prompt, priority="realtime", max_retries=0) llm_output_upper = raw_response.upper() if "INTERVIEW" in llm_output_upper or "CREATE" in llm_output_upper: return "INTERVIEW", "LLM Router" for strat_key in strategies.keys(): if strat_key in llm_output_upper: return strat_key, "LLM Router" except Exception as e: logger.error(f"Router LLM failed: {e}") return "FACT", "Default (No Match)" @router.post("/", response_model=ChatResponse) async def chat_endpoint( request: ChatRequest, llm: LLMService = Depends(get_llm_service), retriever: Retriever = Depends(get_retriever) ): start_time = time.time() query_id = str(uuid.uuid4()) logger.info(f"Chat request [{query_id}]: {request.message[:50]}...") try: # 1. Intent Detection intent, intent_source = await _classify_intent(request.message, llm) logger.info(f"[{query_id}] Final Intent: {intent} via {intent_source}") # Strategy Load strategy = get_decision_strategy(intent) prompt_key = strategy.get("prompt_template", "rag_template") preferred_provider = strategy.get("preferred_provider") sources_hits = [] final_prompt = "" context_str = "" if intent == "INTERVIEW": # --- INTERVIEW MODE --- target_type = _detect_target_type(request.message, strategy.get("schemas", {})) types_cfg = get_types_config() type_def = types_cfg.get("types", {}).get(target_type, {}) fields_list = type_def.get("schema", []) if not fields_list: configured_schemas = strategy.get("schemas", {}) fallback_schema = configured_schemas.get(target_type, configured_schemas.get("default")) if isinstance(fallback_schema, dict): fields_list = fallback_schema.get("fields", []) else: fields_list = fallback_schema or [] logger.info(f"[{query_id}] Interview Type: {target_type}. Fields: {len(fields_list)}") fields_str = "\n- " + "\n- ".join(fields_list) template = llm.get_prompt(prompt_key) final_prompt = template.replace("{context_str}", "Dialogverlauf...") \ .replace("{query}", request.message) \ .replace("{target_type}", target_type) \ .replace("{schema_fields}", fields_str) \ .replace("{schema_hint}", "") sources_hits = [] else: # --- RAG MODE (FACT, DECISION, EMPATHY, CODING) --- inject_types = strategy.get("inject_types", []) prepend_instr = strategy.get("prepend_instruction", "") edge_boosts = strategy.get("edge_boosts", {}) query_req = QueryRequest( query=request.message, mode="hybrid", top_k=request.top_k, explain=request.explain, boost_edges=edge_boosts ) retrieve_result = await retriever.search(query_req) hits = retrieve_result.results if inject_types: strategy_req = QueryRequest( query=request.message, mode="hybrid", top_k=3, filters={"type": inject_types}, explain=False, boost_edges=edge_boosts ) strategy_result = await retriever.search(strategy_req) existing_ids = {h.node_id for h in hits} for strat_hit in strategy_result.results: if strat_hit.node_id not in existing_ids: hits.append(strat_hit) context_str = _build_enriched_context(hits) if hits else "Keine relevanten Notizen gefunden." # --- STABILITY FIX: OLLAMA CONTEXT THROTTLE --- # Begrenzt den Text, um den "decode: cannot decode batches" Fehler zu vermeiden. # MAX_OLLAMA_CHARS = 10000 settings = get_settings() # Falls noch nicht im Scope vorhanden max_chars = getattr(settings, "MAX_OLLAMA_CHARS", 10000) if preferred_provider == "ollama" and len(context_str) > max_chars: logger.warning(f"⚠️ [{query_id}] Context zu groß für Ollama ({len(context_str)} chars). Kürze auf {max_chars}.") context_str = context_str[:max_chars] + "\n[...gekürzt zur Stabilität...]" template = llm.get_prompt(prompt_key) or "{context_str}\n\n{query}" if prepend_instr: context_str = f"{prepend_instr}\n\n{context_str}" final_prompt = template.replace("{context_str}", context_str).replace("{query}", request.message) sources_hits = hits # --- DEBUG SPOT 1: PROMPT CONSTRUCTION --- logger.info(f"[{query_id}] PROMPT CONSTRUCTION COMPLETE. Length: {len(final_prompt)} chars.") if not final_prompt.strip(): logger.error(f"[{query_id}] CRITICAL: Final prompt is empty before sending to LLM!") # --- GENERATION WITH NO-RETRY & DEEP FALLBACK --- system_prompt = llm.get_prompt("system_prompt") # --- DEBUG SPOT 2: PRIMARY CALL --- logger.info(f"[{query_id}] PRIMARY CALL: Sending request to provider '{preferred_provider}' (No Retries)...") answer_text = "" try: # FIX: max_retries=0 verhindert Hänger durch Retry-Kaskaden im Chat answer_text = await llm.generate_raw_response( prompt=final_prompt, system=system_prompt, priority="realtime", provider=preferred_provider, max_retries=0 ) except Exception as e: logger.error(f"🛑 [{query_id}] Primary Provider '{preferred_provider}' failed: {e}") # DEEP FALLBACK: Wenn die Antwort leer ist (Silent Refusal) oder der Primary abgestürzt ist if not answer_text.strip() and preferred_provider != "ollama": # --- DEBUG SPOT 3: FALLBACK TRIGGER --- logger.warning(f"🛑 [{query_id}] PRIMARY '{preferred_provider}' returned EMPTY or FAILED. Triggering Deep Fallback to Ollama...") try: answer_text = await llm.generate_raw_response( prompt=final_prompt, system=system_prompt, priority="realtime", provider="ollama", max_retries=0 ) except Exception as e: logger.error(f"🛑 [{query_id}] Deep Fallback to Ollama also failed: {e}") answer_text = "Entschuldigung, das System ist aktuell überlastet. Bitte versuche es in einem Moment erneut." duration_ms = int((time.time() - start_time) * 1000) # Logging try: log_search( query_id=query_id, query_text=request.message, results=sources_hits, mode="interview" if intent == "INTERVIEW" else "chat_rag", metadata={"intent": intent, "source": intent_source, "provider": preferred_provider} ) except: pass return ChatResponse( query_id=query_id, answer=answer_text, sources=sources_hits, latency_ms=duration_ms, intent=intent, intent_source=intent_source ) except Exception as e: logger.error(f"Error in chat endpoint: {e}", exc_info=True) # Wir geben eine benutzerfreundliche Meldung zurück, statt nur den Error-Stack raise HTTPException(status_code=500, detail="Das System konnte die Anfrage nicht verarbeiten.")