angepasst an die neue LLM Logik
All checks were successful
Deploy mindnet to llm-node / deploy (push) Successful in 4s
All checks were successful
Deploy mindnet to llm-node / deploy (push) Successful in 4s
This commit is contained in:
parent
f6f3213b84
commit
83c0c9944d
|
|
@ -1,9 +1,10 @@
|
|||
"""
|
||||
FILE: app/routers/chat.py
|
||||
DESCRIPTION: Haupt-Chat-Interface (RAG & Interview). Enthält Intent-Router (Keywords/LLM) und Prompt-Construction.
|
||||
VERSION: 2.7.1 (WP-22 Semantic Graph Routing)
|
||||
VERSION: 2.7.2 (Deep Fallback Edition)
|
||||
STATUS: Active
|
||||
FIX: Umstellung auf llm.get_prompt() zur Behebung des 500 Server Errors (Dictionary replace crash).
|
||||
FIX: Respektiert preferred_provider aus decision_engine.yaml und implementiert
|
||||
Deep Fallback Logik zur Vermeidung leerer Cloud-Antworten (Silent Refusal).
|
||||
DEPENDENCIES: app.config, app.models.dto, app.services.llm_service, app.core.retriever, app.services.feedback_service
|
||||
EXTERNAL_CONFIG: config/decision_engine.yaml, config/types.yaml
|
||||
"""
|
||||
|
|
@ -36,7 +37,7 @@ def _load_decision_config() -> Dict[str, Any]:
|
|||
path = Path(settings.DECISION_CONFIG_PATH)
|
||||
default_config = {
|
||||
"strategies": {
|
||||
"FACT": {"trigger_keywords": []}
|
||||
"FACT": {"trigger_keywords": [], "preferred_provider": "openrouter"}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -159,7 +160,7 @@ def _is_question(query: str) -> bool:
|
|||
q = query.strip().lower()
|
||||
if "?" in q: return True
|
||||
|
||||
# W-Fragen Indikatoren (falls User das ? vergisst)
|
||||
# W-Fragen Indikatoren
|
||||
starters = ["wer", "wie", "was", "wo", "wann", "warum", "weshalb", "wozu", "welche", "bist du", "entspricht"]
|
||||
if any(q.startswith(s + " ") for s in starters):
|
||||
return True
|
||||
|
|
@ -170,7 +171,7 @@ async def _classify_intent(query: str, llm: LLMService) -> tuple[str, str]:
|
|||
"""
|
||||
Hybrid Router v5:
|
||||
1. Decision Keywords (Strategie) -> Prio 1
|
||||
2. Type Keywords (Interview Trigger) -> Prio 2, ABER NUR WENN KEINE FRAGE!
|
||||
2. Type Keywords (Interview Trigger) -> Prio 2
|
||||
3. LLM (Fallback) -> Prio 3
|
||||
"""
|
||||
config = get_full_config()
|
||||
|
|
@ -179,7 +180,7 @@ async def _classify_intent(query: str, llm: LLMService) -> tuple[str, str]:
|
|||
|
||||
query_lower = query.lower()
|
||||
|
||||
# 1. FAST PATH A: Strategie Keywords (z.B. "Soll ich...")
|
||||
# 1. FAST PATH A: Strategie Keywords
|
||||
for intent_name, strategy in strategies.items():
|
||||
if intent_name == "FACT": continue
|
||||
keywords = strategy.get("trigger_keywords", [])
|
||||
|
|
@ -187,7 +188,7 @@ async def _classify_intent(query: str, llm: LLMService) -> tuple[str, str]:
|
|||
if k.lower() in query_lower:
|
||||
return intent_name, "Keyword (Strategy)"
|
||||
|
||||
# 2. FAST PATH B: Type Keywords (z.B. "Projekt", "Werte") -> INTERVIEW
|
||||
# 2. FAST PATH B: Type Keywords -> INTERVIEW
|
||||
if not _is_question(query_lower):
|
||||
types_cfg = get_types_config()
|
||||
types_def = types_cfg.get("types", {})
|
||||
|
|
@ -200,8 +201,7 @@ async def _classify_intent(query: str, llm: LLMService) -> tuple[str, str]:
|
|||
|
||||
# 3. SLOW PATH: LLM Router
|
||||
if settings.get("llm_fallback_enabled", False):
|
||||
# FIX: Nutze get_prompt statt direktem Zugriff auf dict
|
||||
router_prompt_template = llm.get_prompt("router_prompt")
|
||||
router_prompt_template = llm.get_prompt("llm_router_prompt")
|
||||
|
||||
if router_prompt_template:
|
||||
prompt = router_prompt_template.replace("{query}", query)
|
||||
|
|
@ -241,6 +241,7 @@ async def chat_endpoint(
|
|||
# Strategy Load
|
||||
strategy = get_decision_strategy(intent)
|
||||
prompt_key = strategy.get("prompt_template", "rag_template")
|
||||
preferred_provider = strategy.get("preferred_provider") # Nutzt Konfiguration aus decision_engine.yaml
|
||||
|
||||
sources_hits = []
|
||||
final_prompt = ""
|
||||
|
|
@ -264,7 +265,6 @@ async def chat_endpoint(
|
|||
logger.info(f"[{query_id}] Interview Type: {target_type}. Fields: {len(fields_list)}")
|
||||
fields_str = "\n- " + "\n- ".join(fields_list)
|
||||
|
||||
# FIX: Nutze get_prompt() zur Auflösung der provider-spezifischen Templates
|
||||
template = llm.get_prompt(prompt_key)
|
||||
final_prompt = template.replace("{context_str}", "Dialogverlauf...") \
|
||||
.replace("{query}", request.message) \
|
||||
|
|
@ -274,14 +274,10 @@ async def chat_endpoint(
|
|||
sources_hits = []
|
||||
|
||||
else:
|
||||
# --- RAG MODE ---
|
||||
# --- RAG MODE (FACT, DECISION, EMPATHY, CODING) ---
|
||||
inject_types = strategy.get("inject_types", [])
|
||||
prepend_instr = strategy.get("prepend_instruction", "")
|
||||
|
||||
# --- WP-22: Semantic Graph Routing (Teil C) ---
|
||||
edge_boosts = strategy.get("edge_boosts", {})
|
||||
if edge_boosts:
|
||||
logger.info(f"[{query_id}] Applying Edge Boosts: {edge_boosts}")
|
||||
|
||||
query_req = QueryRequest(
|
||||
query=request.message,
|
||||
|
|
@ -308,16 +304,8 @@ async def chat_endpoint(
|
|||
if strat_hit.node_id not in existing_ids:
|
||||
hits.append(strat_hit)
|
||||
|
||||
if not hits:
|
||||
context_str = "Keine relevanten Notizen gefunden."
|
||||
else:
|
||||
context_str = _build_enriched_context(hits)
|
||||
|
||||
# FIX: Nutze get_prompt() zur Auflösung der provider-spezifischen Templates
|
||||
template = llm.get_prompt(prompt_key)
|
||||
|
||||
if not template:
|
||||
template = "{context_str}\n\n{query}"
|
||||
context_str = _build_enriched_context(hits) if hits else "Keine relevanten Notizen gefunden."
|
||||
template = llm.get_prompt(prompt_key) or "{context_str}\n\n{query}"
|
||||
|
||||
if prepend_instr:
|
||||
context_str = f"{prepend_instr}\n\n{context_str}"
|
||||
|
|
@ -325,14 +313,25 @@ async def chat_endpoint(
|
|||
final_prompt = template.replace("{context_str}", context_str).replace("{query}", request.message)
|
||||
sources_hits = hits
|
||||
|
||||
# --- GENERATION ---
|
||||
# --- GENERATION MIT DEEP FALLBACK ---
|
||||
system_prompt = llm.get_prompt("system_prompt")
|
||||
|
||||
# Chat nutzt IMMER realtime priority
|
||||
# 1. Versuch mit konfiguriertem Provider (z.B. Ollama für EMPATHY)
|
||||
answer_text = await llm.generate_raw_response(
|
||||
prompt=final_prompt,
|
||||
system=system_prompt,
|
||||
priority="realtime"
|
||||
priority="realtime",
|
||||
provider=preferred_provider
|
||||
)
|
||||
|
||||
# DEEP FALLBACK: Wenn die Antwort leer ist (Silent Refusal in der Cloud)
|
||||
if not answer_text.strip() and preferred_provider != "ollama":
|
||||
logger.warning(f"🛑 [{query_id}] Leere Antwort von '{preferred_provider}'. Starte LOKALEN FALLBACK via Ollama...")
|
||||
answer_text = await llm.generate_raw_response(
|
||||
prompt=final_prompt,
|
||||
system=system_prompt,
|
||||
priority="realtime",
|
||||
provider="ollama"
|
||||
)
|
||||
|
||||
duration_ms = int((time.time() - start_time) * 1000)
|
||||
|
|
@ -344,7 +343,7 @@ async def chat_endpoint(
|
|||
query_text=request.message,
|
||||
results=sources_hits,
|
||||
mode="interview" if intent == "INTERVIEW" else "chat_rag",
|
||||
metadata={"intent": intent, "source": intent_source}
|
||||
metadata={"intent": intent, "source": intent_source, "provider": preferred_provider}
|
||||
)
|
||||
except: pass
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user