angepasst an die neue LLM Logik
All checks were successful
Deploy mindnet to llm-node / deploy (push) Successful in 4s
All checks were successful
Deploy mindnet to llm-node / deploy (push) Successful in 4s
This commit is contained in:
parent
f6f3213b84
commit
83c0c9944d
|
|
@ -1,9 +1,10 @@
|
||||||
"""
|
"""
|
||||||
FILE: app/routers/chat.py
|
FILE: app/routers/chat.py
|
||||||
DESCRIPTION: Haupt-Chat-Interface (RAG & Interview). Enthält Intent-Router (Keywords/LLM) und Prompt-Construction.
|
DESCRIPTION: Haupt-Chat-Interface (RAG & Interview). Enthält Intent-Router (Keywords/LLM) und Prompt-Construction.
|
||||||
VERSION: 2.7.1 (WP-22 Semantic Graph Routing)
|
VERSION: 2.7.2 (Deep Fallback Edition)
|
||||||
STATUS: Active
|
STATUS: Active
|
||||||
FIX: Umstellung auf llm.get_prompt() zur Behebung des 500 Server Errors (Dictionary replace crash).
|
FIX: Respektiert preferred_provider aus decision_engine.yaml und implementiert
|
||||||
|
Deep Fallback Logik zur Vermeidung leerer Cloud-Antworten (Silent Refusal).
|
||||||
DEPENDENCIES: app.config, app.models.dto, app.services.llm_service, app.core.retriever, app.services.feedback_service
|
DEPENDENCIES: app.config, app.models.dto, app.services.llm_service, app.core.retriever, app.services.feedback_service
|
||||||
EXTERNAL_CONFIG: config/decision_engine.yaml, config/types.yaml
|
EXTERNAL_CONFIG: config/decision_engine.yaml, config/types.yaml
|
||||||
"""
|
"""
|
||||||
|
|
@ -36,7 +37,7 @@ def _load_decision_config() -> Dict[str, Any]:
|
||||||
path = Path(settings.DECISION_CONFIG_PATH)
|
path = Path(settings.DECISION_CONFIG_PATH)
|
||||||
default_config = {
|
default_config = {
|
||||||
"strategies": {
|
"strategies": {
|
||||||
"FACT": {"trigger_keywords": []}
|
"FACT": {"trigger_keywords": [], "preferred_provider": "openrouter"}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -159,7 +160,7 @@ def _is_question(query: str) -> bool:
|
||||||
q = query.strip().lower()
|
q = query.strip().lower()
|
||||||
if "?" in q: return True
|
if "?" in q: return True
|
||||||
|
|
||||||
# W-Fragen Indikatoren (falls User das ? vergisst)
|
# W-Fragen Indikatoren
|
||||||
starters = ["wer", "wie", "was", "wo", "wann", "warum", "weshalb", "wozu", "welche", "bist du", "entspricht"]
|
starters = ["wer", "wie", "was", "wo", "wann", "warum", "weshalb", "wozu", "welche", "bist du", "entspricht"]
|
||||||
if any(q.startswith(s + " ") for s in starters):
|
if any(q.startswith(s + " ") for s in starters):
|
||||||
return True
|
return True
|
||||||
|
|
@ -170,7 +171,7 @@ async def _classify_intent(query: str, llm: LLMService) -> tuple[str, str]:
|
||||||
"""
|
"""
|
||||||
Hybrid Router v5:
|
Hybrid Router v5:
|
||||||
1. Decision Keywords (Strategie) -> Prio 1
|
1. Decision Keywords (Strategie) -> Prio 1
|
||||||
2. Type Keywords (Interview Trigger) -> Prio 2, ABER NUR WENN KEINE FRAGE!
|
2. Type Keywords (Interview Trigger) -> Prio 2
|
||||||
3. LLM (Fallback) -> Prio 3
|
3. LLM (Fallback) -> Prio 3
|
||||||
"""
|
"""
|
||||||
config = get_full_config()
|
config = get_full_config()
|
||||||
|
|
@ -179,7 +180,7 @@ async def _classify_intent(query: str, llm: LLMService) -> tuple[str, str]:
|
||||||
|
|
||||||
query_lower = query.lower()
|
query_lower = query.lower()
|
||||||
|
|
||||||
# 1. FAST PATH A: Strategie Keywords (z.B. "Soll ich...")
|
# 1. FAST PATH A: Strategie Keywords
|
||||||
for intent_name, strategy in strategies.items():
|
for intent_name, strategy in strategies.items():
|
||||||
if intent_name == "FACT": continue
|
if intent_name == "FACT": continue
|
||||||
keywords = strategy.get("trigger_keywords", [])
|
keywords = strategy.get("trigger_keywords", [])
|
||||||
|
|
@ -187,7 +188,7 @@ async def _classify_intent(query: str, llm: LLMService) -> tuple[str, str]:
|
||||||
if k.lower() in query_lower:
|
if k.lower() in query_lower:
|
||||||
return intent_name, "Keyword (Strategy)"
|
return intent_name, "Keyword (Strategy)"
|
||||||
|
|
||||||
# 2. FAST PATH B: Type Keywords (z.B. "Projekt", "Werte") -> INTERVIEW
|
# 2. FAST PATH B: Type Keywords -> INTERVIEW
|
||||||
if not _is_question(query_lower):
|
if not _is_question(query_lower):
|
||||||
types_cfg = get_types_config()
|
types_cfg = get_types_config()
|
||||||
types_def = types_cfg.get("types", {})
|
types_def = types_cfg.get("types", {})
|
||||||
|
|
@ -200,8 +201,7 @@ async def _classify_intent(query: str, llm: LLMService) -> tuple[str, str]:
|
||||||
|
|
||||||
# 3. SLOW PATH: LLM Router
|
# 3. SLOW PATH: LLM Router
|
||||||
if settings.get("llm_fallback_enabled", False):
|
if settings.get("llm_fallback_enabled", False):
|
||||||
# FIX: Nutze get_prompt statt direktem Zugriff auf dict
|
router_prompt_template = llm.get_prompt("llm_router_prompt")
|
||||||
router_prompt_template = llm.get_prompt("router_prompt")
|
|
||||||
|
|
||||||
if router_prompt_template:
|
if router_prompt_template:
|
||||||
prompt = router_prompt_template.replace("{query}", query)
|
prompt = router_prompt_template.replace("{query}", query)
|
||||||
|
|
@ -241,6 +241,7 @@ async def chat_endpoint(
|
||||||
# Strategy Load
|
# Strategy Load
|
||||||
strategy = get_decision_strategy(intent)
|
strategy = get_decision_strategy(intent)
|
||||||
prompt_key = strategy.get("prompt_template", "rag_template")
|
prompt_key = strategy.get("prompt_template", "rag_template")
|
||||||
|
preferred_provider = strategy.get("preferred_provider") # Nutzt Konfiguration aus decision_engine.yaml
|
||||||
|
|
||||||
sources_hits = []
|
sources_hits = []
|
||||||
final_prompt = ""
|
final_prompt = ""
|
||||||
|
|
@ -264,7 +265,6 @@ async def chat_endpoint(
|
||||||
logger.info(f"[{query_id}] Interview Type: {target_type}. Fields: {len(fields_list)}")
|
logger.info(f"[{query_id}] Interview Type: {target_type}. Fields: {len(fields_list)}")
|
||||||
fields_str = "\n- " + "\n- ".join(fields_list)
|
fields_str = "\n- " + "\n- ".join(fields_list)
|
||||||
|
|
||||||
# FIX: Nutze get_prompt() zur Auflösung der provider-spezifischen Templates
|
|
||||||
template = llm.get_prompt(prompt_key)
|
template = llm.get_prompt(prompt_key)
|
||||||
final_prompt = template.replace("{context_str}", "Dialogverlauf...") \
|
final_prompt = template.replace("{context_str}", "Dialogverlauf...") \
|
||||||
.replace("{query}", request.message) \
|
.replace("{query}", request.message) \
|
||||||
|
|
@ -274,14 +274,10 @@ async def chat_endpoint(
|
||||||
sources_hits = []
|
sources_hits = []
|
||||||
|
|
||||||
else:
|
else:
|
||||||
# --- RAG MODE ---
|
# --- RAG MODE (FACT, DECISION, EMPATHY, CODING) ---
|
||||||
inject_types = strategy.get("inject_types", [])
|
inject_types = strategy.get("inject_types", [])
|
||||||
prepend_instr = strategy.get("prepend_instruction", "")
|
prepend_instr = strategy.get("prepend_instruction", "")
|
||||||
|
|
||||||
# --- WP-22: Semantic Graph Routing (Teil C) ---
|
|
||||||
edge_boosts = strategy.get("edge_boosts", {})
|
edge_boosts = strategy.get("edge_boosts", {})
|
||||||
if edge_boosts:
|
|
||||||
logger.info(f"[{query_id}] Applying Edge Boosts: {edge_boosts}")
|
|
||||||
|
|
||||||
query_req = QueryRequest(
|
query_req = QueryRequest(
|
||||||
query=request.message,
|
query=request.message,
|
||||||
|
|
@ -308,16 +304,8 @@ async def chat_endpoint(
|
||||||
if strat_hit.node_id not in existing_ids:
|
if strat_hit.node_id not in existing_ids:
|
||||||
hits.append(strat_hit)
|
hits.append(strat_hit)
|
||||||
|
|
||||||
if not hits:
|
context_str = _build_enriched_context(hits) if hits else "Keine relevanten Notizen gefunden."
|
||||||
context_str = "Keine relevanten Notizen gefunden."
|
template = llm.get_prompt(prompt_key) or "{context_str}\n\n{query}"
|
||||||
else:
|
|
||||||
context_str = _build_enriched_context(hits)
|
|
||||||
|
|
||||||
# FIX: Nutze get_prompt() zur Auflösung der provider-spezifischen Templates
|
|
||||||
template = llm.get_prompt(prompt_key)
|
|
||||||
|
|
||||||
if not template:
|
|
||||||
template = "{context_str}\n\n{query}"
|
|
||||||
|
|
||||||
if prepend_instr:
|
if prepend_instr:
|
||||||
context_str = f"{prepend_instr}\n\n{context_str}"
|
context_str = f"{prepend_instr}\n\n{context_str}"
|
||||||
|
|
@ -325,16 +313,27 @@ async def chat_endpoint(
|
||||||
final_prompt = template.replace("{context_str}", context_str).replace("{query}", request.message)
|
final_prompt = template.replace("{context_str}", context_str).replace("{query}", request.message)
|
||||||
sources_hits = hits
|
sources_hits = hits
|
||||||
|
|
||||||
# --- GENERATION ---
|
# --- GENERATION MIT DEEP FALLBACK ---
|
||||||
system_prompt = llm.get_prompt("system_prompt")
|
system_prompt = llm.get_prompt("system_prompt")
|
||||||
|
|
||||||
# Chat nutzt IMMER realtime priority
|
# 1. Versuch mit konfiguriertem Provider (z.B. Ollama für EMPATHY)
|
||||||
answer_text = await llm.generate_raw_response(
|
answer_text = await llm.generate_raw_response(
|
||||||
prompt=final_prompt,
|
prompt=final_prompt,
|
||||||
system=system_prompt,
|
system=system_prompt,
|
||||||
priority="realtime"
|
priority="realtime",
|
||||||
|
provider=preferred_provider
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# DEEP FALLBACK: Wenn die Antwort leer ist (Silent Refusal in der Cloud)
|
||||||
|
if not answer_text.strip() and preferred_provider != "ollama":
|
||||||
|
logger.warning(f"🛑 [{query_id}] Leere Antwort von '{preferred_provider}'. Starte LOKALEN FALLBACK via Ollama...")
|
||||||
|
answer_text = await llm.generate_raw_response(
|
||||||
|
prompt=final_prompt,
|
||||||
|
system=system_prompt,
|
||||||
|
priority="realtime",
|
||||||
|
provider="ollama"
|
||||||
|
)
|
||||||
|
|
||||||
duration_ms = int((time.time() - start_time) * 1000)
|
duration_ms = int((time.time() - start_time) * 1000)
|
||||||
|
|
||||||
# Logging
|
# Logging
|
||||||
|
|
@ -344,7 +343,7 @@ async def chat_endpoint(
|
||||||
query_text=request.message,
|
query_text=request.message,
|
||||||
results=sources_hits,
|
results=sources_hits,
|
||||||
mode="interview" if intent == "INTERVIEW" else "chat_rag",
|
mode="interview" if intent == "INTERVIEW" else "chat_rag",
|
||||||
metadata={"intent": intent, "source": intent_source}
|
metadata={"intent": intent, "source": intent_source, "provider": preferred_provider}
|
||||||
)
|
)
|
||||||
except: pass
|
except: pass
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user