verbessertes Prompt, und chat-Router optimiert

2025-12-09 13:50:18 +01:00 · 2025-12-09 13:50:18 +01:00 · 6c2074166c
commit 6c2074166c
parent bd44af2b68
2 changed files with 50 additions and 27 deletions
--- a/app/routers/chat.py
+++ b/app/routers/chat.py
@ -1,6 +1,6 @@
 """
-app/routers/chat.py — RAG Endpunkt (WP-06 Hybrid Router)
+app/routers/chat.py — RAG Endpunkt (WP-06 Hybrid Router v2)
-Version: 0.2.1 (Fix: System Prompt Separation)
+Update: Robusteres LLM-Parsing für Small Language Models (SLMs).
 """
 from fastapi import APIRouter, HTTPException, Depends
@ -88,6 +88,11 @@ def _build_enriched_context(hits: List[QueryHit]) -> str:
    return "\n\n".join(context_parts)
 async def _classify_intent(query: str, llm: LLMService) -> str:
    """
    Hybrid Router v2: 
    1. Keyword Check (Best/Longest Match) -> FAST
    2. LLM Fallback (Robust Parsing) -> SMART
    """
    config = get_full_config()
    strategies = config.get("strategies", {})
    settings = config.get("settings", {})
@ -96,7 +101,7 @@ async def _classify_intent(query: str, llm: LLMService) -> str:
    best_intent = None
    max_match_length = 0
-    # 1. FAST PATH
+    # 1. FAST PATH: Keywords
    for intent_name, strategy in strategies.items():
        if intent_name == "FACT": continue
        keywords = strategy.get("trigger_keywords", [])
@ -110,29 +115,41 @@ async def _classify_intent(query: str, llm: LLMService) -> str:
        logger.info(f"Intent detected via KEYWORD: {best_intent}")
        return best_intent
-    # 2. SLOW PATH
+    # 2. SLOW PATH: LLM Router
    if settings.get("llm_fallback_enabled", False):
        router_prompt_template = settings.get("llm_router_prompt", "")
        if router_prompt_template:
            prompt = router_prompt_template.replace("{query}", query)
            logger.info("Keywords failed. Asking LLM for Intent...")
-            # Router braucht keinen System-Prompt, nur den Classifier-Prompt
+            # Kurzer Raw Call
-            llm_decision = await llm.generate_raw_response(prompt)
+            raw_response = await llm.generate_raw_response(prompt)
-            llm_decision = llm_decision.strip().upper()
+            # --- Robust Parsing für SLMs ---
-            if ":" in llm_decision:
+            # Wir suchen nach den bekannten Strategie-Namen im Output
-                llm_decision = llm_decision.split(":")[-1].strip()
+            llm_output_upper = raw_response.upper()
            logger.info(f"LLM Router Raw Output: '{raw_response}'") # Debugging
-            # Satzzeichen entfernen für sauberen Match
+            found_intents = []
-            llm_decision = ''.join(filter(str.isalnum, llm_decision))
+            for strat_key in strategies.keys():
-
+                # Wir prüfen, ob der Strategie-Name (z.B. "EMPATHY") im Text vorkommt
-            if llm_decision in strategies:
+                if strat_key in llm_output_upper:
-                logger.info(f"Intent detected via LLM: {llm_decision}")
+                    found_intents.append(strat_key)
-                return llm_decision
+            
            # Entscheidung
            final_intent = "FACT"
            if len(found_intents) == 1:
                # Eindeutiger Treffer
                final_intent = found_intents[0]
                logger.info(f"Intent detected via LLM (Parsed): {final_intent}")
                return final_intent
            elif len(found_intents) > 1:
                # Mehrere Treffer (z.B. "Es ist FACT oder DECISION") -> Nimm den ersten oder Fallback
                logger.warning(f"LLM returned multiple intents {found_intents}. Using first match: {found_intents[0]}")
                return found_intents[0]
            else:
-                logger.warning(f"LLM predicted unknown intent '{llm_decision}', falling back to FACT.")
+                logger.warning(f"LLM did not return a valid strategy name. Falling back to FACT.")
-
+                
    return "FACT"
@router.post("/", response_model=ChatResponse)
@ -200,7 +217,7 @@ async def chat_endpoint(
        logger.info(f"[{query_id}] Sending to LLM (Intent: {intent}, Template: {prompt_key})...")
-        # FIX: System-Prompt separat übergeben!
+        # System-Prompt separat übergeben
        answer_text = await llm.generate_raw_response(prompt=final_prompt, system=system_prompt)
        duration_ms = int((time.time() - start_time) * 1000)
--- a/config/decision_engine.yaml
+++ b/config/decision_engine.yaml
@ -1,22 +1,28 @@
 # config/decision_engine.yaml
 # Steuerung der Decision Engine (WP-06)
 # Hybrid-Modus: Keywords (Fast) + LLM Router (Smart Fallback)
-version: 1.1
+version: 1.2
 settings:
  # Schalter: Soll das LLM gefragt werden, wenn kein Keyword passt?
  llm_fallback_enabled: true
-  # Der Prompt für den "Semantic Router" (Slow Path)
+  # Few-Shot Prompting für bessere SLM-Performance
  llm_router_prompt: |
-    Analysiere die folgende Nachricht und entscheide, welche Strategie passt.
+    Du bist ein Klassifikator. Analysiere die Nachricht und wähle die passende Strategie.
-    Antworte NUR mit dem Namen der Strategie (ein Wort).
+    Antworte NUR mit dem Namen der Strategie.
    STRATEGIEN:
-    - DECISION: User fragt nach Rat, Meinung, Strategie, Vor/Nachteilen.
+    - DECISION: Rat, Strategie, Vor/Nachteile, "Soll ich".
-    - EMPATHY: User äußert Gefühle, Frust, Freude oder persönliche Probleme.
+    - EMPATHY: Gefühle, Frust, Freude, Probleme, "Alles ist sinnlos", "Ich bin traurig".
-    - CODING: User fragt nach Code, Syntax oder Programmierung.
+    - CODING: Code, Syntax, Programmierung, Python.
-    - FACT: User fragt nach Wissen, Definitionen oder Fakten (Default).
+    - FACT: Wissen, Fakten, Definitionen.
    BEISPIELE:
    User: "Wie funktioniert Qdrant?" -> FACT
    User: "Soll ich Qdrant nutzen?" -> DECISION
    User: "Schreibe ein Python Script" -> CODING
    User: "Alles ist grau und sinnlos" -> EMPATHY
    User: "Mir geht es heute gut" -> EMPATHY
    NACHRICHT: "{query}"