verbessertes Prompt, und chat-Router optimiert
This commit is contained in:
parent
bd44af2b68
commit
6c2074166c
|
|
@ -1,6 +1,6 @@
|
||||||
"""
|
"""
|
||||||
app/routers/chat.py — RAG Endpunkt (WP-06 Hybrid Router)
|
app/routers/chat.py — RAG Endpunkt (WP-06 Hybrid Router v2)
|
||||||
Version: 0.2.1 (Fix: System Prompt Separation)
|
Update: Robusteres LLM-Parsing für Small Language Models (SLMs).
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from fastapi import APIRouter, HTTPException, Depends
|
from fastapi import APIRouter, HTTPException, Depends
|
||||||
|
|
@ -88,6 +88,11 @@ def _build_enriched_context(hits: List[QueryHit]) -> str:
|
||||||
return "\n\n".join(context_parts)
|
return "\n\n".join(context_parts)
|
||||||
|
|
||||||
async def _classify_intent(query: str, llm: LLMService) -> str:
|
async def _classify_intent(query: str, llm: LLMService) -> str:
|
||||||
|
"""
|
||||||
|
Hybrid Router v2:
|
||||||
|
1. Keyword Check (Best/Longest Match) -> FAST
|
||||||
|
2. LLM Fallback (Robust Parsing) -> SMART
|
||||||
|
"""
|
||||||
config = get_full_config()
|
config = get_full_config()
|
||||||
strategies = config.get("strategies", {})
|
strategies = config.get("strategies", {})
|
||||||
settings = config.get("settings", {})
|
settings = config.get("settings", {})
|
||||||
|
|
@ -96,7 +101,7 @@ async def _classify_intent(query: str, llm: LLMService) -> str:
|
||||||
best_intent = None
|
best_intent = None
|
||||||
max_match_length = 0
|
max_match_length = 0
|
||||||
|
|
||||||
# 1. FAST PATH
|
# 1. FAST PATH: Keywords
|
||||||
for intent_name, strategy in strategies.items():
|
for intent_name, strategy in strategies.items():
|
||||||
if intent_name == "FACT": continue
|
if intent_name == "FACT": continue
|
||||||
keywords = strategy.get("trigger_keywords", [])
|
keywords = strategy.get("trigger_keywords", [])
|
||||||
|
|
@ -110,29 +115,41 @@ async def _classify_intent(query: str, llm: LLMService) -> str:
|
||||||
logger.info(f"Intent detected via KEYWORD: {best_intent}")
|
logger.info(f"Intent detected via KEYWORD: {best_intent}")
|
||||||
return best_intent
|
return best_intent
|
||||||
|
|
||||||
# 2. SLOW PATH
|
# 2. SLOW PATH: LLM Router
|
||||||
if settings.get("llm_fallback_enabled", False):
|
if settings.get("llm_fallback_enabled", False):
|
||||||
router_prompt_template = settings.get("llm_router_prompt", "")
|
router_prompt_template = settings.get("llm_router_prompt", "")
|
||||||
if router_prompt_template:
|
if router_prompt_template:
|
||||||
prompt = router_prompt_template.replace("{query}", query)
|
prompt = router_prompt_template.replace("{query}", query)
|
||||||
logger.info("Keywords failed. Asking LLM for Intent...")
|
logger.info("Keywords failed. Asking LLM for Intent...")
|
||||||
|
|
||||||
# Router braucht keinen System-Prompt, nur den Classifier-Prompt
|
# Kurzer Raw Call
|
||||||
llm_decision = await llm.generate_raw_response(prompt)
|
raw_response = await llm.generate_raw_response(prompt)
|
||||||
|
|
||||||
llm_decision = llm_decision.strip().upper()
|
# --- Robust Parsing für SLMs ---
|
||||||
if ":" in llm_decision:
|
# Wir suchen nach den bekannten Strategie-Namen im Output
|
||||||
llm_decision = llm_decision.split(":")[-1].strip()
|
llm_output_upper = raw_response.upper()
|
||||||
|
logger.info(f"LLM Router Raw Output: '{raw_response}'") # Debugging
|
||||||
|
|
||||||
# Satzzeichen entfernen für sauberen Match
|
found_intents = []
|
||||||
llm_decision = ''.join(filter(str.isalnum, llm_decision))
|
for strat_key in strategies.keys():
|
||||||
|
# Wir prüfen, ob der Strategie-Name (z.B. "EMPATHY") im Text vorkommt
|
||||||
if llm_decision in strategies:
|
if strat_key in llm_output_upper:
|
||||||
logger.info(f"Intent detected via LLM: {llm_decision}")
|
found_intents.append(strat_key)
|
||||||
return llm_decision
|
|
||||||
|
# Entscheidung
|
||||||
|
final_intent = "FACT"
|
||||||
|
if len(found_intents) == 1:
|
||||||
|
# Eindeutiger Treffer
|
||||||
|
final_intent = found_intents[0]
|
||||||
|
logger.info(f"Intent detected via LLM (Parsed): {final_intent}")
|
||||||
|
return final_intent
|
||||||
|
elif len(found_intents) > 1:
|
||||||
|
# Mehrere Treffer (z.B. "Es ist FACT oder DECISION") -> Nimm den ersten oder Fallback
|
||||||
|
logger.warning(f"LLM returned multiple intents {found_intents}. Using first match: {found_intents[0]}")
|
||||||
|
return found_intents[0]
|
||||||
else:
|
else:
|
||||||
logger.warning(f"LLM predicted unknown intent '{llm_decision}', falling back to FACT.")
|
logger.warning(f"LLM did not return a valid strategy name. Falling back to FACT.")
|
||||||
|
|
||||||
return "FACT"
|
return "FACT"
|
||||||
|
|
||||||
@router.post("/", response_model=ChatResponse)
|
@router.post("/", response_model=ChatResponse)
|
||||||
|
|
@ -200,7 +217,7 @@ async def chat_endpoint(
|
||||||
|
|
||||||
logger.info(f"[{query_id}] Sending to LLM (Intent: {intent}, Template: {prompt_key})...")
|
logger.info(f"[{query_id}] Sending to LLM (Intent: {intent}, Template: {prompt_key})...")
|
||||||
|
|
||||||
# FIX: System-Prompt separat übergeben!
|
# System-Prompt separat übergeben
|
||||||
answer_text = await llm.generate_raw_response(prompt=final_prompt, system=system_prompt)
|
answer_text = await llm.generate_raw_response(prompt=final_prompt, system=system_prompt)
|
||||||
|
|
||||||
duration_ms = int((time.time() - start_time) * 1000)
|
duration_ms = int((time.time() - start_time) * 1000)
|
||||||
|
|
|
||||||
|
|
@ -1,22 +1,28 @@
|
||||||
# config/decision_engine.yaml
|
# config/decision_engine.yaml
|
||||||
# Steuerung der Decision Engine (WP-06)
|
# Steuerung der Decision Engine (WP-06)
|
||||||
# Hybrid-Modus: Keywords (Fast) + LLM Router (Smart Fallback)
|
# Hybrid-Modus: Keywords (Fast) + LLM Router (Smart Fallback)
|
||||||
version: 1.1
|
version: 1.2
|
||||||
|
|
||||||
settings:
|
settings:
|
||||||
# Schalter: Soll das LLM gefragt werden, wenn kein Keyword passt?
|
|
||||||
llm_fallback_enabled: true
|
llm_fallback_enabled: true
|
||||||
|
|
||||||
# Der Prompt für den "Semantic Router" (Slow Path)
|
# Few-Shot Prompting für bessere SLM-Performance
|
||||||
llm_router_prompt: |
|
llm_router_prompt: |
|
||||||
Analysiere die folgende Nachricht und entscheide, welche Strategie passt.
|
Du bist ein Klassifikator. Analysiere die Nachricht und wähle die passende Strategie.
|
||||||
Antworte NUR mit dem Namen der Strategie (ein Wort).
|
Antworte NUR mit dem Namen der Strategie.
|
||||||
|
|
||||||
STRATEGIEN:
|
STRATEGIEN:
|
||||||
- DECISION: User fragt nach Rat, Meinung, Strategie, Vor/Nachteilen.
|
- DECISION: Rat, Strategie, Vor/Nachteile, "Soll ich".
|
||||||
- EMPATHY: User äußert Gefühle, Frust, Freude oder persönliche Probleme.
|
- EMPATHY: Gefühle, Frust, Freude, Probleme, "Alles ist sinnlos", "Ich bin traurig".
|
||||||
- CODING: User fragt nach Code, Syntax oder Programmierung.
|
- CODING: Code, Syntax, Programmierung, Python.
|
||||||
- FACT: User fragt nach Wissen, Definitionen oder Fakten (Default).
|
- FACT: Wissen, Fakten, Definitionen.
|
||||||
|
|
||||||
|
BEISPIELE:
|
||||||
|
User: "Wie funktioniert Qdrant?" -> FACT
|
||||||
|
User: "Soll ich Qdrant nutzen?" -> DECISION
|
||||||
|
User: "Schreibe ein Python Script" -> CODING
|
||||||
|
User: "Alles ist grau und sinnlos" -> EMPATHY
|
||||||
|
User: "Mir geht es heute gut" -> EMPATHY
|
||||||
|
|
||||||
NACHRICHT: "{query}"
|
NACHRICHT: "{query}"
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user