diff --git a/app/routers/chat.py b/app/routers/chat.py index 986c131..721a05d 100644 --- a/app/routers/chat.py +++ b/app/routers/chat.py @@ -1,9 +1,10 @@ """ FILE: app/routers/chat.py DESCRIPTION: Haupt-Chat-Interface (RAG & Interview). Enthält Intent-Router (Keywords/LLM) und Prompt-Construction. -VERSION: 2.7.1 (WP-22 Semantic Graph Routing) +VERSION: 2.7.2 (Deep Fallback Edition) STATUS: Active -FIX: Umstellung auf llm.get_prompt() zur Behebung des 500 Server Errors (Dictionary replace crash). +FIX: Respektiert preferred_provider aus decision_engine.yaml und implementiert + Deep Fallback Logik zur Vermeidung leerer Cloud-Antworten (Silent Refusal). DEPENDENCIES: app.config, app.models.dto, app.services.llm_service, app.core.retriever, app.services.feedback_service EXTERNAL_CONFIG: config/decision_engine.yaml, config/types.yaml """ @@ -36,7 +37,7 @@ def _load_decision_config() -> Dict[str, Any]: path = Path(settings.DECISION_CONFIG_PATH) default_config = { "strategies": { - "FACT": {"trigger_keywords": []} + "FACT": {"trigger_keywords": [], "preferred_provider": "openrouter"} } } @@ -159,7 +160,7 @@ def _is_question(query: str) -> bool: q = query.strip().lower() if "?" in q: return True - # W-Fragen Indikatoren (falls User das ? vergisst) + # W-Fragen Indikatoren starters = ["wer", "wie", "was", "wo", "wann", "warum", "weshalb", "wozu", "welche", "bist du", "entspricht"] if any(q.startswith(s + " ") for s in starters): return True @@ -170,7 +171,7 @@ async def _classify_intent(query: str, llm: LLMService) -> tuple[str, str]: """ Hybrid Router v5: 1. Decision Keywords (Strategie) -> Prio 1 - 2. Type Keywords (Interview Trigger) -> Prio 2, ABER NUR WENN KEINE FRAGE! + 2. Type Keywords (Interview Trigger) -> Prio 2 3. LLM (Fallback) -> Prio 3 """ config = get_full_config() @@ -179,7 +180,7 @@ async def _classify_intent(query: str, llm: LLMService) -> tuple[str, str]: query_lower = query.lower() - # 1. FAST PATH A: Strategie Keywords (z.B. "Soll ich...") + # 1. FAST PATH A: Strategie Keywords for intent_name, strategy in strategies.items(): if intent_name == "FACT": continue keywords = strategy.get("trigger_keywords", []) @@ -187,7 +188,7 @@ async def _classify_intent(query: str, llm: LLMService) -> tuple[str, str]: if k.lower() in query_lower: return intent_name, "Keyword (Strategy)" - # 2. FAST PATH B: Type Keywords (z.B. "Projekt", "Werte") -> INTERVIEW + # 2. FAST PATH B: Type Keywords -> INTERVIEW if not _is_question(query_lower): types_cfg = get_types_config() types_def = types_cfg.get("types", {}) @@ -200,8 +201,7 @@ async def _classify_intent(query: str, llm: LLMService) -> tuple[str, str]: # 3. SLOW PATH: LLM Router if settings.get("llm_fallback_enabled", False): - # FIX: Nutze get_prompt statt direktem Zugriff auf dict - router_prompt_template = llm.get_prompt("router_prompt") + router_prompt_template = llm.get_prompt("llm_router_prompt") if router_prompt_template: prompt = router_prompt_template.replace("{query}", query) @@ -241,6 +241,7 @@ async def chat_endpoint( # Strategy Load strategy = get_decision_strategy(intent) prompt_key = strategy.get("prompt_template", "rag_template") + preferred_provider = strategy.get("preferred_provider") # Nutzt Konfiguration aus decision_engine.yaml sources_hits = [] final_prompt = "" @@ -264,7 +265,6 @@ async def chat_endpoint( logger.info(f"[{query_id}] Interview Type: {target_type}. Fields: {len(fields_list)}") fields_str = "\n- " + "\n- ".join(fields_list) - # FIX: Nutze get_prompt() zur Auflösung der provider-spezifischen Templates template = llm.get_prompt(prompt_key) final_prompt = template.replace("{context_str}", "Dialogverlauf...") \ .replace("{query}", request.message) \ @@ -274,14 +274,10 @@ async def chat_endpoint( sources_hits = [] else: - # --- RAG MODE --- + # --- RAG MODE (FACT, DECISION, EMPATHY, CODING) --- inject_types = strategy.get("inject_types", []) prepend_instr = strategy.get("prepend_instruction", "") - - # --- WP-22: Semantic Graph Routing (Teil C) --- edge_boosts = strategy.get("edge_boosts", {}) - if edge_boosts: - logger.info(f"[{query_id}] Applying Edge Boosts: {edge_boosts}") query_req = QueryRequest( query=request.message, @@ -308,33 +304,36 @@ async def chat_endpoint( if strat_hit.node_id not in existing_ids: hits.append(strat_hit) - if not hits: - context_str = "Keine relevanten Notizen gefunden." - else: - context_str = _build_enriched_context(hits) - - # FIX: Nutze get_prompt() zur Auflösung der provider-spezifischen Templates - template = llm.get_prompt(prompt_key) + context_str = _build_enriched_context(hits) if hits else "Keine relevanten Notizen gefunden." + template = llm.get_prompt(prompt_key) or "{context_str}\n\n{query}" - if not template: - template = "{context_str}\n\n{query}" - if prepend_instr: context_str = f"{prepend_instr}\n\n{context_str}" final_prompt = template.replace("{context_str}", context_str).replace("{query}", request.message) sources_hits = hits - # --- GENERATION --- + # --- GENERATION MIT DEEP FALLBACK --- system_prompt = llm.get_prompt("system_prompt") - # Chat nutzt IMMER realtime priority + # 1. Versuch mit konfiguriertem Provider (z.B. Ollama für EMPATHY) answer_text = await llm.generate_raw_response( prompt=final_prompt, system=system_prompt, - priority="realtime" + priority="realtime", + provider=preferred_provider ) + # DEEP FALLBACK: Wenn die Antwort leer ist (Silent Refusal in der Cloud) + if not answer_text.strip() and preferred_provider != "ollama": + logger.warning(f"🛑 [{query_id}] Leere Antwort von '{preferred_provider}'. Starte LOKALEN FALLBACK via Ollama...") + answer_text = await llm.generate_raw_response( + prompt=final_prompt, + system=system_prompt, + priority="realtime", + provider="ollama" + ) + duration_ms = int((time.time() - start_time) * 1000) # Logging @@ -344,7 +343,7 @@ async def chat_endpoint( query_text=request.message, results=sources_hits, mode="interview" if intent == "INTERVIEW" else "chat_rag", - metadata={"intent": intent, "source": intent_source} + metadata={"intent": intent, "source": intent_source, "provider": preferred_provider} ) except: pass