angepasst an die neue LLM Logik
All checks were successful
Deploy mindnet to llm-node / deploy (push) Successful in 4s

This commit is contained in:
Lars 2025-12-26 05:11:48 +01:00
parent f6f3213b84
commit 83c0c9944d

View File

@ -1,9 +1,10 @@
"""
FILE: app/routers/chat.py
DESCRIPTION: Haupt-Chat-Interface (RAG & Interview). Enthält Intent-Router (Keywords/LLM) und Prompt-Construction.
VERSION: 2.7.1 (WP-22 Semantic Graph Routing)
VERSION: 2.7.2 (Deep Fallback Edition)
STATUS: Active
FIX: Umstellung auf llm.get_prompt() zur Behebung des 500 Server Errors (Dictionary replace crash).
FIX: Respektiert preferred_provider aus decision_engine.yaml und implementiert
Deep Fallback Logik zur Vermeidung leerer Cloud-Antworten (Silent Refusal).
DEPENDENCIES: app.config, app.models.dto, app.services.llm_service, app.core.retriever, app.services.feedback_service
EXTERNAL_CONFIG: config/decision_engine.yaml, config/types.yaml
"""
@ -36,7 +37,7 @@ def _load_decision_config() -> Dict[str, Any]:
path = Path(settings.DECISION_CONFIG_PATH)
default_config = {
"strategies": {
"FACT": {"trigger_keywords": []}
"FACT": {"trigger_keywords": [], "preferred_provider": "openrouter"}
}
}
@ -159,7 +160,7 @@ def _is_question(query: str) -> bool:
q = query.strip().lower()
if "?" in q: return True
# W-Fragen Indikatoren (falls User das ? vergisst)
# W-Fragen Indikatoren
starters = ["wer", "wie", "was", "wo", "wann", "warum", "weshalb", "wozu", "welche", "bist du", "entspricht"]
if any(q.startswith(s + " ") for s in starters):
return True
@ -170,7 +171,7 @@ async def _classify_intent(query: str, llm: LLMService) -> tuple[str, str]:
"""
Hybrid Router v5:
1. Decision Keywords (Strategie) -> Prio 1
2. Type Keywords (Interview Trigger) -> Prio 2, ABER NUR WENN KEINE FRAGE!
2. Type Keywords (Interview Trigger) -> Prio 2
3. LLM (Fallback) -> Prio 3
"""
config = get_full_config()
@ -179,7 +180,7 @@ async def _classify_intent(query: str, llm: LLMService) -> tuple[str, str]:
query_lower = query.lower()
# 1. FAST PATH A: Strategie Keywords (z.B. "Soll ich...")
# 1. FAST PATH A: Strategie Keywords
for intent_name, strategy in strategies.items():
if intent_name == "FACT": continue
keywords = strategy.get("trigger_keywords", [])
@ -187,7 +188,7 @@ async def _classify_intent(query: str, llm: LLMService) -> tuple[str, str]:
if k.lower() in query_lower:
return intent_name, "Keyword (Strategy)"
# 2. FAST PATH B: Type Keywords (z.B. "Projekt", "Werte") -> INTERVIEW
# 2. FAST PATH B: Type Keywords -> INTERVIEW
if not _is_question(query_lower):
types_cfg = get_types_config()
types_def = types_cfg.get("types", {})
@ -200,8 +201,7 @@ async def _classify_intent(query: str, llm: LLMService) -> tuple[str, str]:
# 3. SLOW PATH: LLM Router
if settings.get("llm_fallback_enabled", False):
# FIX: Nutze get_prompt statt direktem Zugriff auf dict
router_prompt_template = llm.get_prompt("router_prompt")
router_prompt_template = llm.get_prompt("llm_router_prompt")
if router_prompt_template:
prompt = router_prompt_template.replace("{query}", query)
@ -241,6 +241,7 @@ async def chat_endpoint(
# Strategy Load
strategy = get_decision_strategy(intent)
prompt_key = strategy.get("prompt_template", "rag_template")
preferred_provider = strategy.get("preferred_provider") # Nutzt Konfiguration aus decision_engine.yaml
sources_hits = []
final_prompt = ""
@ -264,7 +265,6 @@ async def chat_endpoint(
logger.info(f"[{query_id}] Interview Type: {target_type}. Fields: {len(fields_list)}")
fields_str = "\n- " + "\n- ".join(fields_list)
# FIX: Nutze get_prompt() zur Auflösung der provider-spezifischen Templates
template = llm.get_prompt(prompt_key)
final_prompt = template.replace("{context_str}", "Dialogverlauf...") \
.replace("{query}", request.message) \
@ -274,14 +274,10 @@ async def chat_endpoint(
sources_hits = []
else:
# --- RAG MODE ---
# --- RAG MODE (FACT, DECISION, EMPATHY, CODING) ---
inject_types = strategy.get("inject_types", [])
prepend_instr = strategy.get("prepend_instruction", "")
# --- WP-22: Semantic Graph Routing (Teil C) ---
edge_boosts = strategy.get("edge_boosts", {})
if edge_boosts:
logger.info(f"[{query_id}] Applying Edge Boosts: {edge_boosts}")
query_req = QueryRequest(
query=request.message,
@ -308,16 +304,8 @@ async def chat_endpoint(
if strat_hit.node_id not in existing_ids:
hits.append(strat_hit)
if not hits:
context_str = "Keine relevanten Notizen gefunden."
else:
context_str = _build_enriched_context(hits)
# FIX: Nutze get_prompt() zur Auflösung der provider-spezifischen Templates
template = llm.get_prompt(prompt_key)
if not template:
template = "{context_str}\n\n{query}"
context_str = _build_enriched_context(hits) if hits else "Keine relevanten Notizen gefunden."
template = llm.get_prompt(prompt_key) or "{context_str}\n\n{query}"
if prepend_instr:
context_str = f"{prepend_instr}\n\n{context_str}"
@ -325,14 +313,25 @@ async def chat_endpoint(
final_prompt = template.replace("{context_str}", context_str).replace("{query}", request.message)
sources_hits = hits
# --- GENERATION ---
# --- GENERATION MIT DEEP FALLBACK ---
system_prompt = llm.get_prompt("system_prompt")
# Chat nutzt IMMER realtime priority
# 1. Versuch mit konfiguriertem Provider (z.B. Ollama für EMPATHY)
answer_text = await llm.generate_raw_response(
prompt=final_prompt,
system=system_prompt,
priority="realtime"
priority="realtime",
provider=preferred_provider
)
# DEEP FALLBACK: Wenn die Antwort leer ist (Silent Refusal in der Cloud)
if not answer_text.strip() and preferred_provider != "ollama":
logger.warning(f"🛑 [{query_id}] Leere Antwort von '{preferred_provider}'. Starte LOKALEN FALLBACK via Ollama...")
answer_text = await llm.generate_raw_response(
prompt=final_prompt,
system=system_prompt,
priority="realtime",
provider="ollama"
)
duration_ms = int((time.time() - start_time) * 1000)
@ -344,7 +343,7 @@ async def chat_endpoint(
query_text=request.message,
results=sources_hits,
mode="interview" if intent == "INTERVIEW" else "chat_rag",
metadata={"intent": intent, "source": intent_source}
metadata={"intent": intent, "source": intent_source, "provider": preferred_provider}
)
except: pass