Frage wird durch ? oder Fragewort identifiziert
This commit is contained in:
parent
65bcea71ee
commit
70ffa5cd4e
|
|
@ -1,14 +1,6 @@
|
||||||
"""
|
"""
|
||||||
app/routers/chat.py — RAG Endpunkt (WP-06 Hybrid Router + WP-07 Interview Mode)
|
app/routers/chat.py — RAG Endpunkt
|
||||||
Version: 2.4.1 (Fix: Type-based Intent Detection)
|
Version: 2.5.0 (Fix: Question Detection protects against False-Positive Interviews)
|
||||||
|
|
||||||
Features:
|
|
||||||
- Hybrid Intent Router (Keyword + LLM)
|
|
||||||
- Strategic Retrieval (Late Binding via Config)
|
|
||||||
- Interview Loop (Schema-driven Data Collection)
|
|
||||||
- Context Enrichment (Payload/Source Fallback)
|
|
||||||
- Data Flywheel (Feedback Logging Integration)
|
|
||||||
- NEU: Lädt detection_keywords aus types.yaml für präzise Erkennung.
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from fastapi import APIRouter, HTTPException, Depends
|
from fastapi import APIRouter, HTTPException, Depends
|
||||||
|
|
@ -78,8 +70,6 @@ def get_types_config() -> Dict[str, Any]:
|
||||||
def get_decision_strategy(intent: str) -> Dict[str, Any]:
|
def get_decision_strategy(intent: str) -> Dict[str, Any]:
|
||||||
config = get_full_config()
|
config = get_full_config()
|
||||||
strategies = config.get("strategies", {})
|
strategies = config.get("strategies", {})
|
||||||
# Fallback: Wenn Intent INTERVIEW ist, aber nicht konfiguriert, nehme FACT
|
|
||||||
# (Aber INTERVIEW sollte in decision_engine.yaml stehen!)
|
|
||||||
return strategies.get(intent, strategies.get("FACT", {}))
|
return strategies.get(intent, strategies.get("FACT", {}))
|
||||||
|
|
||||||
# --- Helper: Target Type Detection (WP-07) ---
|
# --- Helper: Target Type Detection (WP-07) ---
|
||||||
|
|
@ -159,12 +149,24 @@ def _build_enriched_context(hits: List[QueryHit]) -> str:
|
||||||
|
|
||||||
return "\n\n".join(context_parts)
|
return "\n\n".join(context_parts)
|
||||||
|
|
||||||
|
def _is_question(query: str) -> bool:
|
||||||
|
"""Prüft, ob der Input wahrscheinlich eine Frage ist."""
|
||||||
|
q = query.strip().lower()
|
||||||
|
if "?" in q: return True
|
||||||
|
|
||||||
|
# W-Fragen Indikatoren (falls User das ? vergisst)
|
||||||
|
starters = ["wer", "wie", "was", "wo", "wann", "warum", "weshalb", "wozu", "welche", "bist du", "entspricht"]
|
||||||
|
if any(q.startswith(s + " ") for s in starters):
|
||||||
|
return True
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
async def _classify_intent(query: str, llm: LLMService) -> tuple[str, str]:
|
async def _classify_intent(query: str, llm: LLMService) -> tuple[str, str]:
|
||||||
"""
|
"""
|
||||||
Hybrid Router v4:
|
Hybrid Router v5:
|
||||||
1. Decision Keywords (Strategie)
|
1. Decision Keywords (Strategie) -> Prio 1
|
||||||
2. Type Keywords (Interview Trigger)
|
2. Type Keywords (Interview Trigger) -> Prio 2, ABER NUR WENN KEINE FRAGE!
|
||||||
3. LLM (Fallback)
|
3. LLM (Fallback) -> Prio 3
|
||||||
"""
|
"""
|
||||||
config = get_full_config()
|
config = get_full_config()
|
||||||
strategies = config.get("strategies", {})
|
strategies = config.get("strategies", {})
|
||||||
|
|
@ -180,30 +182,35 @@ async def _classify_intent(query: str, llm: LLMService) -> tuple[str, str]:
|
||||||
if k.lower() in query_lower:
|
if k.lower() in query_lower:
|
||||||
return intent_name, "Keyword (Strategy)"
|
return intent_name, "Keyword (Strategy)"
|
||||||
|
|
||||||
# 2. FAST PATH B: Type Keywords (z.B. "Projekt", "passiert") -> INTERVIEW
|
# 2. FAST PATH B: Type Keywords (z.B. "Projekt", "Werte") -> INTERVIEW
|
||||||
# Wir prüfen, ob ein Typ erkannt wird. Wenn ja -> Interview.
|
# FIX: Wir prüfen, ob es eine Frage ist. Fragen zu Typen sollen RAG (FACT/DECISION) sein,
|
||||||
# Wir laden Schemas nicht hier, sondern nutzen types.yaml global
|
# keine Interviews. Wir überlassen das dann dem LLM Router (Slow Path).
|
||||||
types_cfg = get_types_config()
|
|
||||||
types_def = types_cfg.get("types", {})
|
|
||||||
|
|
||||||
for type_name, type_data in types_def.items():
|
if not _is_question(query_lower):
|
||||||
keywords = type_data.get("detection_keywords", [])
|
types_cfg = get_types_config()
|
||||||
for kw in keywords:
|
types_def = types_cfg.get("types", {})
|
||||||
if kw.lower() in query_lower:
|
|
||||||
return "INTERVIEW", f"Keyword (Type: {type_name})"
|
for type_name, type_data in types_def.items():
|
||||||
|
keywords = type_data.get("detection_keywords", [])
|
||||||
|
for kw in keywords:
|
||||||
|
if kw.lower() in query_lower:
|
||||||
|
return "INTERVIEW", f"Keyword (Type: {type_name})"
|
||||||
|
|
||||||
# 3. SLOW PATH: LLM Router
|
# 3. SLOW PATH: LLM Router
|
||||||
if settings.get("llm_fallback_enabled", False):
|
if settings.get("llm_fallback_enabled", False):
|
||||||
router_prompt_template = settings.get("llm_router_prompt", "")
|
# Nutze Prompts aus prompts.yaml (via LLM Service)
|
||||||
|
router_prompt_template = llm.prompts.get("router_prompt", "")
|
||||||
|
|
||||||
if router_prompt_template:
|
if router_prompt_template:
|
||||||
prompt = router_prompt_template.replace("{query}", query)
|
prompt = router_prompt_template.replace("{query}", query)
|
||||||
logger.info("Keywords failed. Asking LLM for Intent...")
|
logger.info("Keywords failed (or Question detected). Asking LLM for Intent...")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
raw_response = await llm.generate_raw_response(prompt)
|
# Nutze priority="realtime" für den Router, damit er nicht wartet
|
||||||
|
raw_response = await llm.generate_raw_response(prompt, priority="realtime")
|
||||||
llm_output_upper = raw_response.upper()
|
llm_output_upper = raw_response.upper()
|
||||||
|
|
||||||
# Zuerst INTERVIEW prüfen (LLMs erkennen oft "Create" Intention)
|
# Zuerst INTERVIEW prüfen
|
||||||
if "INTERVIEW" in llm_output_upper or "CREATE" in llm_output_upper:
|
if "INTERVIEW" in llm_output_upper or "CREATE" in llm_output_upper:
|
||||||
return "INTERVIEW", "LLM Router"
|
return "INTERVIEW", "LLM Router"
|
||||||
|
|
||||||
|
|
@ -240,23 +247,12 @@ async def chat_endpoint(
|
||||||
|
|
||||||
if intent == "INTERVIEW":
|
if intent == "INTERVIEW":
|
||||||
# --- INTERVIEW MODE ---
|
# --- INTERVIEW MODE ---
|
||||||
# Wir müssen jetzt herausfinden, WELCHES Schema wir nutzen.
|
|
||||||
# Dazu schauen wir wieder in die types.yaml (via _detect_target_type)
|
|
||||||
|
|
||||||
# Schemas aus decision_engine.yaml laden (falls dort overrides sind)
|
|
||||||
# oder generisch aus types.yaml bauen (besser!)
|
|
||||||
|
|
||||||
# Strategie: Wir nutzen _detect_target_type, das jetzt types.yaml kennt.
|
|
||||||
target_type = _detect_target_type(request.message, strategy.get("schemas", {}))
|
target_type = _detect_target_type(request.message, strategy.get("schemas", {}))
|
||||||
|
|
||||||
# Schema laden (aus types.yaml bevorzugt)
|
|
||||||
types_cfg = get_types_config()
|
types_cfg = get_types_config()
|
||||||
type_def = types_cfg.get("types", {}).get(target_type, {})
|
type_def = types_cfg.get("types", {}).get(target_type, {})
|
||||||
|
|
||||||
# Hole Schema-Felder aus types.yaml (schema: [...])
|
|
||||||
fields_list = type_def.get("schema", [])
|
fields_list = type_def.get("schema", [])
|
||||||
|
|
||||||
# Fallback auf decision_engine.yaml, falls in types.yaml nichts steht
|
|
||||||
if not fields_list:
|
if not fields_list:
|
||||||
configured_schemas = strategy.get("schemas", {})
|
configured_schemas = strategy.get("schemas", {})
|
||||||
fallback_schema = configured_schemas.get(target_type, configured_schemas.get("default"))
|
fallback_schema = configured_schemas.get(target_type, configured_schemas.get("default"))
|
||||||
|
|
@ -266,17 +262,14 @@ async def chat_endpoint(
|
||||||
fields_list = fallback_schema or []
|
fields_list = fallback_schema or []
|
||||||
|
|
||||||
logger.info(f"[{query_id}] Interview Type: {target_type}. Fields: {len(fields_list)}")
|
logger.info(f"[{query_id}] Interview Type: {target_type}. Fields: {len(fields_list)}")
|
||||||
|
|
||||||
fields_str = "\n- " + "\n- ".join(fields_list)
|
fields_str = "\n- " + "\n- ".join(fields_list)
|
||||||
|
|
||||||
# Prompt Assembly
|
|
||||||
template = llm.prompts.get(prompt_key, "")
|
template = llm.prompts.get(prompt_key, "")
|
||||||
final_prompt = template.replace("{context_str}", "Dialogverlauf...") \
|
final_prompt = template.replace("{context_str}", "Dialogverlauf...") \
|
||||||
.replace("{query}", request.message) \
|
.replace("{query}", request.message) \
|
||||||
.replace("{target_type}", target_type) \
|
.replace("{target_type}", target_type) \
|
||||||
.replace("{schema_fields}", fields_str) \
|
.replace("{schema_fields}", fields_str) \
|
||||||
.replace("{schema_hint}", "")
|
.replace("{schema_hint}", "")
|
||||||
|
|
||||||
sources_hits = []
|
sources_hits = []
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
|
@ -323,8 +316,12 @@ async def chat_endpoint(
|
||||||
# --- GENERATION ---
|
# --- GENERATION ---
|
||||||
system_prompt = llm.prompts.get("system_prompt", "")
|
system_prompt = llm.prompts.get("system_prompt", "")
|
||||||
|
|
||||||
# Hier nutzen wir das erhöhte Timeout aus dem LLMService Update
|
# Chat nutzt IMMER realtime priority
|
||||||
answer_text = await llm.generate_raw_response(prompt=final_prompt, system=system_prompt)
|
answer_text = await llm.generate_raw_response(
|
||||||
|
prompt=final_prompt,
|
||||||
|
system=system_prompt,
|
||||||
|
priority="realtime"
|
||||||
|
)
|
||||||
|
|
||||||
duration_ms = int((time.time() - start_time) * 1000)
|
duration_ms = int((time.time() - start_time) * 1000)
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user