angepasst an die neue LLM Logik

2025-12-26 05:11:48 +01:00 · 2025-12-26 05:11:48 +01:00 · 83c0c9944d
commit 83c0c9944d
parent f6f3213b84
1 changed files with 28 additions and 29 deletions
--- a/app/routers/chat.py
+++ b/app/routers/chat.py
@ -1,9 +1,10 @@
 """
 FILE: app/routers/chat.py
 DESCRIPTION: Haupt-Chat-Interface (RAG & Interview). Enthält Intent-Router (Keywords/LLM) und Prompt-Construction.
-VERSION: 2.7.1 (WP-22 Semantic Graph Routing)
+VERSION: 2.7.2 (Deep Fallback Edition)
 STATUS: Active
-FIX: Umstellung auf llm.get_prompt() zur Behebung des 500 Server Errors (Dictionary replace crash).
+FIX: Respektiert preferred_provider aus decision_engine.yaml und implementiert 
+     Deep Fallback Logik zur Vermeidung leerer Cloud-Antworten (Silent Refusal).
 DEPENDENCIES: app.config, app.models.dto, app.services.llm_service, app.core.retriever, app.services.feedback_service
 EXTERNAL_CONFIG: config/decision_engine.yaml, config/types.yaml
 """
@ -36,7 +37,7 @@ def _load_decision_config() -> Dict[str, Any]:
    path = Path(settings.DECISION_CONFIG_PATH)
    default_config = {
        "strategies": {
-            "FACT": {"trigger_keywords": []}
+            "FACT": {"trigger_keywords": [], "preferred_provider": "openrouter"}
        }
    }
    
@ -159,7 +160,7 @@ def _is_question(query: str) -> bool:
    q = query.strip().lower()
    if "?" in q: return True
    
-    # W-Fragen Indikatoren (falls User das ? vergisst)
+    # W-Fragen Indikatoren
    starters = ["wer", "wie", "was", "wo", "wann", "warum", "weshalb", "wozu", "welche", "bist du", "entspricht"]
    if any(q.startswith(s + " ") for s in starters):
        return True
@ -170,7 +171,7 @@ async def _classify_intent(query: str, llm: LLMService) -> tuple[str, str]:
    """
    Hybrid Router v5: 
    1. Decision Keywords (Strategie) -> Prio 1
-    2. Type Keywords (Interview Trigger) -> Prio 2, ABER NUR WENN KEINE FRAGE!
+    2. Type Keywords (Interview Trigger) -> Prio 2
    3. LLM (Fallback) -> Prio 3
    """
    config = get_full_config()
@ -179,7 +180,7 @@ async def _classify_intent(query: str, llm: LLMService) -> tuple[str, str]:
    
    query_lower = query.lower()
    
-    # 1. FAST PATH A: Strategie Keywords (z.B. "Soll ich...")
+    # 1. FAST PATH A: Strategie Keywords
    for intent_name, strategy in strategies.items():
        if intent_name == "FACT": continue
        keywords = strategy.get("trigger_keywords", [])
@ -187,7 +188,7 @@ async def _classify_intent(query: str, llm: LLMService) -> tuple[str, str]:
            if k.lower() in query_lower:
                return intent_name, "Keyword (Strategy)"
    
-    # 2. FAST PATH B: Type Keywords (z.B. "Projekt", "Werte") -> INTERVIEW
+    # 2. FAST PATH B: Type Keywords -> INTERVIEW
    if not _is_question(query_lower):
        types_cfg = get_types_config()
        types_def = types_cfg.get("types", {})
@ -200,8 +201,7 @@ async def _classify_intent(query: str, llm: LLMService) -> tuple[str, str]:

    # 3. SLOW PATH: LLM Router
    if settings.get("llm_fallback_enabled", False):
-        # FIX: Nutze get_prompt statt direktem Zugriff auf dict
-        router_prompt_template = llm.get_prompt("router_prompt")
+        router_prompt_template = llm.get_prompt("llm_router_prompt")
        
        if router_prompt_template:
            prompt = router_prompt_template.replace("{query}", query)
@ -241,6 +241,7 @@ async def chat_endpoint(
        # Strategy Load
        strategy = get_decision_strategy(intent)
        prompt_key = strategy.get("prompt_template", "rag_template")
+        preferred_provider = strategy.get("preferred_provider") # Nutzt Konfiguration aus decision_engine.yaml
        
        sources_hits = []
        final_prompt = ""
@ -264,7 +265,6 @@ async def chat_endpoint(
            logger.info(f"[{query_id}] Interview Type: {target_type}. Fields: {len(fields_list)}")
            fields_str = "\n- " + "\n- ".join(fields_list)
            
-            # FIX: Nutze get_prompt() zur Auflösung der provider-spezifischen Templates
            template = llm.get_prompt(prompt_key)
            final_prompt = template.replace("{context_str}", "Dialogverlauf...") \
                                   .replace("{query}", request.message) \
@ -274,14 +274,10 @@ async def chat_endpoint(
            sources_hits = []
            
        else:
-            # --- RAG MODE ---
+            # --- RAG MODE (FACT, DECISION, EMPATHY, CODING) ---
            inject_types = strategy.get("inject_types", [])
            prepend_instr = strategy.get("prepend_instruction", "")
-            
-            # --- WP-22: Semantic Graph Routing (Teil C) ---
            edge_boosts = strategy.get("edge_boosts", {})
-            if edge_boosts:
-                logger.info(f"[{query_id}] Applying Edge Boosts: {edge_boosts}")

            query_req = QueryRequest(
                query=request.message,
@ -308,16 +304,8 @@ async def chat_endpoint(
                    if strat_hit.node_id not in existing_ids:
                        hits.append(strat_hit)

-            if not hits:
-                context_str = "Keine relevanten Notizen gefunden."
-            else:
-                context_str = _build_enriched_context(hits)
-
-            # FIX: Nutze get_prompt() zur Auflösung der provider-spezifischen Templates
-            template = llm.get_prompt(prompt_key)
-            
-            if not template:
-                 template = "{context_str}\n\n{query}"
+            context_str = _build_enriched_context(hits) if hits else "Keine relevanten Notizen gefunden."
+            template = llm.get_prompt(prompt_key) or "{context_str}\n\n{query}"
            
            if prepend_instr:
                 context_str = f"{prepend_instr}\n\n{context_str}"
@ -325,14 +313,25 @@ async def chat_endpoint(
            final_prompt = template.replace("{context_str}", context_str).replace("{query}", request.message)
            sources_hits = hits
        
-        # --- GENERATION ---
+        # --- GENERATION MIT DEEP FALLBACK ---
        system_prompt = llm.get_prompt("system_prompt")
        
-        # Chat nutzt IMMER realtime priority
+        # 1. Versuch mit konfiguriertem Provider (z.B. Ollama für EMPATHY)
        answer_text = await llm.generate_raw_response(
            prompt=final_prompt, 
            system=system_prompt,
-            priority="realtime"
+            priority="realtime",
+            provider=preferred_provider
+        )
+
+        # DEEP FALLBACK: Wenn die Antwort leer ist (Silent Refusal in der Cloud)
+        if not answer_text.strip() and preferred_provider != "ollama":
+            logger.warning(f"🛑 [{query_id}] Leere Antwort von '{preferred_provider}'. Starte LOKALEN FALLBACK via Ollama...")
+            answer_text = await llm.generate_raw_response(
+                prompt=final_prompt, 
+                system=system_prompt,
+                priority="realtime",
+                provider="ollama"
            )

        duration_ms = int((time.time() - start_time) * 1000)
@ -344,7 +343,7 @@ async def chat_endpoint(
                query_text=request.message,
                results=sources_hits,
                mode="interview" if intent == "INTERVIEW" else "chat_rag",
-                metadata={"intent": intent, "source": intent_source}
+                metadata={"intent": intent, "source": intent_source, "provider": preferred_provider}
            )
        except: pass