diff --git a/app/routers/chat.py b/app/routers/chat.py
index 986c131..721a05d 100644
--- a/app/routers/chat.py
+++ b/app/routers/chat.py
@@ -1,9 +1,10 @@
 """
 FILE: app/routers/chat.py
 DESCRIPTION: Haupt-Chat-Interface (RAG & Interview). Enthält Intent-Router (Keywords/LLM) und Prompt-Construction.
-VERSION: 2.7.1 (WP-22 Semantic Graph Routing)
+VERSION: 2.7.2 (Deep Fallback Edition)
 STATUS: Active
-FIX: Umstellung auf llm.get_prompt() zur Behebung des 500 Server Errors (Dictionary replace crash).
+FIX: Respektiert preferred_provider aus decision_engine.yaml und implementiert 
+     Deep Fallback Logik zur Vermeidung leerer Cloud-Antworten (Silent Refusal).
 DEPENDENCIES: app.config, app.models.dto, app.services.llm_service, app.core.retriever, app.services.feedback_service
 EXTERNAL_CONFIG: config/decision_engine.yaml, config/types.yaml
 """
@@ -36,7 +37,7 @@ def _load_decision_config() -> Dict[str, Any]:
     path = Path(settings.DECISION_CONFIG_PATH)
     default_config = {
         "strategies": {
-            "FACT": {"trigger_keywords": []}
+            "FACT": {"trigger_keywords": [], "preferred_provider": "openrouter"}
         }
     }
     
@@ -159,7 +160,7 @@ def _is_question(query: str) -> bool:
     q = query.strip().lower()
     if "?" in q: return True
     
-    # W-Fragen Indikatoren (falls User das ? vergisst)
+    # W-Fragen Indikatoren
     starters = ["wer", "wie", "was", "wo", "wann", "warum", "weshalb", "wozu", "welche", "bist du", "entspricht"]
     if any(q.startswith(s + " ") for s in starters):
         return True
@@ -170,7 +171,7 @@ async def _classify_intent(query: str, llm: LLMService) -> tuple[str, str]:
     """
     Hybrid Router v5: 
     1. Decision Keywords (Strategie) -> Prio 1
-    2. Type Keywords (Interview Trigger) -> Prio 2, ABER NUR WENN KEINE FRAGE!
+    2. Type Keywords (Interview Trigger) -> Prio 2
     3. LLM (Fallback) -> Prio 3
     """
     config = get_full_config()
@@ -179,7 +180,7 @@ async def _classify_intent(query: str, llm: LLMService) -> tuple[str, str]:
     
     query_lower = query.lower()
     
-    # 1. FAST PATH A: Strategie Keywords (z.B. "Soll ich...")
+    # 1. FAST PATH A: Strategie Keywords
     for intent_name, strategy in strategies.items():
         if intent_name == "FACT": continue
         keywords = strategy.get("trigger_keywords", [])
@@ -187,7 +188,7 @@ async def _classify_intent(query: str, llm: LLMService) -> tuple[str, str]:
             if k.lower() in query_lower:
                 return intent_name, "Keyword (Strategy)"
     
-    # 2. FAST PATH B: Type Keywords (z.B. "Projekt", "Werte") -> INTERVIEW
+    # 2. FAST PATH B: Type Keywords -> INTERVIEW
     if not _is_question(query_lower):
         types_cfg = get_types_config()
         types_def = types_cfg.get("types", {})
@@ -200,8 +201,7 @@ async def _classify_intent(query: str, llm: LLMService) -> tuple[str, str]:
 
     # 3. SLOW PATH: LLM Router
     if settings.get("llm_fallback_enabled", False):
-        # FIX: Nutze get_prompt statt direktem Zugriff auf dict
-        router_prompt_template = llm.get_prompt("router_prompt")
+        router_prompt_template = llm.get_prompt("llm_router_prompt")
         
         if router_prompt_template:
             prompt = router_prompt_template.replace("{query}", query)
@@ -241,6 +241,7 @@ async def chat_endpoint(
         # Strategy Load
         strategy = get_decision_strategy(intent)
         prompt_key = strategy.get("prompt_template", "rag_template")
+        preferred_provider = strategy.get("preferred_provider") # Nutzt Konfiguration aus decision_engine.yaml
         
         sources_hits = []
         final_prompt = ""
@@ -264,7 +265,6 @@ async def chat_endpoint(
             logger.info(f"[{query_id}] Interview Type: {target_type}. Fields: {len(fields_list)}")
             fields_str = "\n- " + "\n- ".join(fields_list)
             
-            # FIX: Nutze get_prompt() zur Auflösung der provider-spezifischen Templates
             template = llm.get_prompt(prompt_key)
             final_prompt = template.replace("{context_str}", "Dialogverlauf...") \
                                    .replace("{query}", request.message) \
@@ -274,14 +274,10 @@ async def chat_endpoint(
             sources_hits = []
             
         else:
-            # --- RAG MODE ---
+            # --- RAG MODE (FACT, DECISION, EMPATHY, CODING) ---
             inject_types = strategy.get("inject_types", [])
             prepend_instr = strategy.get("prepend_instruction", "")
-            
-            # --- WP-22: Semantic Graph Routing (Teil C) ---
             edge_boosts = strategy.get("edge_boosts", {})
-            if edge_boosts:
-                logger.info(f"[{query_id}] Applying Edge Boosts: {edge_boosts}")
 
             query_req = QueryRequest(
                 query=request.message,
@@ -308,33 +304,36 @@ async def chat_endpoint(
                     if strat_hit.node_id not in existing_ids:
                         hits.append(strat_hit)
 
-            if not hits:
-                context_str = "Keine relevanten Notizen gefunden."
-            else:
-                context_str = _build_enriched_context(hits)
-
-            # FIX: Nutze get_prompt() zur Auflösung der provider-spezifischen Templates
-            template = llm.get_prompt(prompt_key)
+            context_str = _build_enriched_context(hits) if hits else "Keine relevanten Notizen gefunden."
+            template = llm.get_prompt(prompt_key) or "{context_str}\n\n{query}"
             
-            if not template:
-                 template = "{context_str}\n\n{query}"
-
             if prepend_instr:
                  context_str = f"{prepend_instr}\n\n{context_str}"
 
             final_prompt = template.replace("{context_str}", context_str).replace("{query}", request.message)
             sources_hits = hits
         
-        # --- GENERATION ---
+        # --- GENERATION MIT DEEP FALLBACK ---
         system_prompt = llm.get_prompt("system_prompt")
         
-        # Chat nutzt IMMER realtime priority
+        # 1. Versuch mit konfiguriertem Provider (z.B. Ollama für EMPATHY)
         answer_text = await llm.generate_raw_response(
             prompt=final_prompt, 
             system=system_prompt,
-            priority="realtime"
+            priority="realtime",
+            provider=preferred_provider
         )
 
+        # DEEP FALLBACK: Wenn die Antwort leer ist (Silent Refusal in der Cloud)
+        if not answer_text.strip() and preferred_provider != "ollama":
+            logger.warning(f"🛑 [{query_id}] Leere Antwort von '{preferred_provider}'. Starte LOKALEN FALLBACK via Ollama...")
+            answer_text = await llm.generate_raw_response(
+                prompt=final_prompt, 
+                system=system_prompt,
+                priority="realtime",
+                provider="ollama"
+            )
+
         duration_ms = int((time.time() - start_time) * 1000)
         
         # Logging
@@ -344,7 +343,7 @@ async def chat_endpoint(
                 query_text=request.message,
                 results=sources_hits,
                 mode="interview" if intent == "INTERVIEW" else "chat_rag",
-                metadata={"intent": intent, "source": intent_source}
+                metadata={"intent": intent, "source": intent_source, "provider": preferred_provider}
             )
         except: pass