Update Decision Engine for WP-25b: Enhance intent processing with robust intent cleaning and lazy loading. Improve strategy determination by validating against known strategies and streamline response generation. Bump version to 1.3.1 to reflect these optimizations.
This commit is contained in:
parent
7026fc4fed
commit
38fac89f73
|
|
@ -3,13 +3,13 @@ FILE: app/core/retrieval/decision_engine.py
|
||||||
DESCRIPTION: Der Agentic Orchestrator für MindNet (WP-25b Edition).
|
DESCRIPTION: Der Agentic Orchestrator für MindNet (WP-25b Edition).
|
||||||
Realisiert Multi-Stream Retrieval, Intent-basiertes Routing
|
Realisiert Multi-Stream Retrieval, Intent-basiertes Routing
|
||||||
und die neue Lazy-Prompt Orchestrierung (Module A & B).
|
und die neue Lazy-Prompt Orchestrierung (Module A & B).
|
||||||
VERSION: 1.3.0 (WP-25b: Lazy Prompt Orchestration)
|
VERSION: 1.3.1 (WP-25b: Robust Intent Cleaning & Lazy Loading)
|
||||||
STATUS: Active
|
STATUS: Active
|
||||||
FIX:
|
FIX:
|
||||||
|
- WP-25b: Robuste Bereinigung von Intent-Strings (Fix: CODING[/S] -> CODING).
|
||||||
- WP-25b: Umstellung auf Lazy-Loading (Übergabe von prompt_key + variables).
|
- WP-25b: Umstellung auf Lazy-Loading (Übergabe von prompt_key + variables).
|
||||||
- WP-25b: Entfernung lokaler String-Formatierung zur Ermöglichung modell-spezifischer Prompts.
|
- WP-25a: Voller Erhalt der Profil-Kaskade via LLMService v3.5.5.
|
||||||
- WP-25a: Volle Integration der Profil-Kaskade via LLMService v3.5.5.
|
- WP-25: Beibehaltung von Stream-Tracing, Edge-Boosts und Pre-Initialization.
|
||||||
- WP-25: Beibehaltung von Stream-Tracing und Pre-Initialization Robustness.
|
|
||||||
"""
|
"""
|
||||||
import asyncio
|
import asyncio
|
||||||
import logging
|
import logging
|
||||||
|
|
@ -76,14 +76,13 @@ class DecisionEngine:
|
||||||
return await self._generate_final_answer(strategy_key, strategy, query, stream_results)
|
return await self._generate_final_answer(strategy_key, strategy, query, stream_results)
|
||||||
|
|
||||||
async def _determine_strategy(self, query: str) -> str:
|
async def _determine_strategy(self, query: str) -> str:
|
||||||
"""WP-25b: Nutzt den LLM-Router via Lazy-Loading prompt_key."""
|
"""WP-25b: Nutzt den LLM-Router via Lazy-Loading und bereinigt Modell-Artefakte."""
|
||||||
settings_cfg = self.config.get("settings", {})
|
settings_cfg = self.config.get("settings", {})
|
||||||
prompt_key = settings_cfg.get("router_prompt_key", "intent_router_v1")
|
prompt_key = settings_cfg.get("router_prompt_key", "intent_router_v1")
|
||||||
router_profile = settings_cfg.get("router_profile")
|
router_profile = settings_cfg.get("router_profile")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# WP-25b: Keine manuelle Formatierung mehr. Wir übergeben nur Key und Variablen.
|
# WP-25b: Delegation an LLMService ohne manuelle Vor-Formatierung.
|
||||||
# Der LLMService wählt den passenden Prompt für das router_profile Modell.
|
|
||||||
response = await self.llm_service.generate_raw_response(
|
response = await self.llm_service.generate_raw_response(
|
||||||
prompt_key=prompt_key,
|
prompt_key=prompt_key,
|
||||||
variables={"query": query},
|
variables={"query": query},
|
||||||
|
|
@ -91,7 +90,20 @@ class DecisionEngine:
|
||||||
priority="realtime",
|
priority="realtime",
|
||||||
profile_name=router_profile
|
profile_name=router_profile
|
||||||
)
|
)
|
||||||
return str(response).strip().upper()
|
|
||||||
|
# WP-25b FIX: Bereinigung von Stop-Markern wie [/S] oder </s>
|
||||||
|
raw_intent = str(response).replace("[/S]", "").replace("</s>", "").strip().upper()
|
||||||
|
|
||||||
|
# Robustheit: Nur das erste Wort nehmen, falls das Modell zu viel plaudert
|
||||||
|
intent = raw_intent.split()[0] if raw_intent else "FACT_WHAT"
|
||||||
|
|
||||||
|
# Validierung gegen bekannte Strategien aus der decision_engine.yaml
|
||||||
|
known_strategies = self.config.get("strategies", {}).keys()
|
||||||
|
if intent not in known_strategies:
|
||||||
|
logger.warning(f"⚠️ Unmapped intent '{intent}' from router. Falling back.")
|
||||||
|
return "FACT_WHAT"
|
||||||
|
|
||||||
|
return intent
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Strategy Routing failed: {e}")
|
logger.error(f"Strategy Routing failed: {e}")
|
||||||
return "FACT_WHAT"
|
return "FACT_WHAT"
|
||||||
|
|
@ -150,8 +162,7 @@ class DecisionEngine:
|
||||||
async def _compress_stream_content(self, stream_name: str, content: str, query: str, profile: Optional[str]) -> str:
|
async def _compress_stream_content(self, stream_name: str, content: str, query: str, profile: Optional[str]) -> str:
|
||||||
"""WP-25b Module A: Inhaltsverdichtung via Lazy-Loading 'compression_template'."""
|
"""WP-25b Module A: Inhaltsverdichtung via Lazy-Loading 'compression_template'."""
|
||||||
try:
|
try:
|
||||||
# WP-25b: Wir übergeben den Auftrag an den LLMService.
|
# WP-25b: Delegation der Inhaltsverdichtung an den LLMService.
|
||||||
# Das Modell-spezifische Template wird erst beim Call aufgelöst.
|
|
||||||
summary = await self.llm_service.generate_raw_response(
|
summary = await self.llm_service.generate_raw_response(
|
||||||
prompt_key="compression_template",
|
prompt_key="compression_template",
|
||||||
variables={
|
variables={
|
||||||
|
|
@ -169,7 +180,7 @@ class DecisionEngine:
|
||||||
return content
|
return content
|
||||||
|
|
||||||
async def _run_single_stream(self, name: str, cfg: Dict, query: str) -> QueryResponse:
|
async def _run_single_stream(self, name: str, cfg: Dict, query: str) -> QueryResponse:
|
||||||
"""Spezialisierte Graph-Suche mit Stream-Tracing (WP-25)."""
|
"""Spezialisierte Graph-Suche mit Stream-Tracing und Edge-Boosts (WP-25)."""
|
||||||
transformed_query = cfg.get("query_template", "{query}").format(query=query)
|
transformed_query = cfg.get("query_template", "{query}").format(query=query)
|
||||||
|
|
||||||
request = QueryRequest(
|
request = QueryRequest(
|
||||||
|
|
@ -177,7 +188,7 @@ class DecisionEngine:
|
||||||
top_k=cfg.get("top_k", 5),
|
top_k=cfg.get("top_k", 5),
|
||||||
filters={"type": cfg.get("filter_types", [])},
|
filters={"type": cfg.get("filter_types", [])},
|
||||||
expand={"depth": 1},
|
expand={"depth": 1},
|
||||||
boost_edges=cfg.get("edge_boosts", {}),
|
boost_edges=cfg.get("edge_boosts", {}), # WP-25a Erhalt
|
||||||
explain=True
|
explain=True
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -204,19 +215,23 @@ class DecisionEngine:
|
||||||
query: str,
|
query: str,
|
||||||
stream_results: Dict[str, str]
|
stream_results: Dict[str, str]
|
||||||
) -> str:
|
) -> str:
|
||||||
"""WP-25b: Finale Synthese via Lazy-Prompt 'rag_template'."""
|
"""WP-25b: Finale Synthese via Lazy-Prompt Orchestrierung."""
|
||||||
profile = strategy.get("llm_profile")
|
profile = strategy.get("llm_profile")
|
||||||
template_key = strategy.get("prompt_template", "rag_template")
|
# Nutzt den Key aus der YAML oder 'fact_synthesis_v1' als sicheren Default
|
||||||
|
template_key = strategy.get("prompt_template", "fact_synthesis_v1")
|
||||||
system_prompt = self.llm_service.get_prompt("system_prompt")
|
system_prompt = self.llm_service.get_prompt("system_prompt")
|
||||||
|
|
||||||
# WP-25 ROBUSTNESS: Pre-Initialization
|
# WP-25 ROBUSTNESS: Pre-Initialization der Variablen
|
||||||
all_possible_streams = ["values_stream", "facts_stream", "biography_stream", "risk_stream", "tech_stream"]
|
all_possible_streams = ["values_stream", "facts_stream", "biography_stream", "risk_stream", "tech_stream"]
|
||||||
template_vars = {s: "" for s in all_possible_streams}
|
template_vars = {s: "" for s in all_possible_streams}
|
||||||
template_vars.update(stream_results)
|
template_vars.update(stream_results)
|
||||||
template_vars["query"] = query
|
template_vars["query"] = query
|
||||||
|
|
||||||
# WP-25b: Wir reichen die Variablen direkt an den Service weiter.
|
# WP-25a Erhalt: Optionale Prepend-Anweisung
|
||||||
# Formatierung erfolgt erst nach Profil-Auflösung (Gemini vs. Llama vs. Phi3).
|
template_vars["prepend_instruction"] = strategy.get("prepend_instruction", "")
|
||||||
|
|
||||||
|
# WP-25b: Delegation der Synthese an den LLMService.
|
||||||
|
# Formatierung erfolgt erst nach Profil-Auflösung (Gemini vs. Llama vs. Qwen).
|
||||||
try:
|
try:
|
||||||
return await self.llm_service.generate_raw_response(
|
return await self.llm_service.generate_raw_response(
|
||||||
prompt_key=template_key,
|
prompt_key=template_key,
|
||||||
|
|
|
||||||
|
|
@ -108,7 +108,7 @@ class LLMService:
|
||||||
if not isinstance(data, dict):
|
if not isinstance(data, dict):
|
||||||
return str(data)
|
return str(data)
|
||||||
|
|
||||||
# 1. Spezifischstes Match: Exakte Modell-ID (z.B. 'meta-llama/llama-3.3-70b-instruct:free')
|
# 1. Spezifischstes Match: Exakte Modell-ID (z.B. 'google/gemini-2.0-flash-exp:free')
|
||||||
if model_id and model_id in data:
|
if model_id and model_id in data:
|
||||||
return str(data[model_id])
|
return str(data[model_id])
|
||||||
|
|
||||||
|
|
@ -166,6 +166,7 @@ class LLMService:
|
||||||
if prompt_key:
|
if prompt_key:
|
||||||
template = self.get_prompt(prompt_key, model_id=target_model, provider=target_provider)
|
template = self.get_prompt(prompt_key, model_id=target_model, provider=target_provider)
|
||||||
try:
|
try:
|
||||||
|
# Formatierung mit den übergebenen Variablen
|
||||||
current_prompt = template.format(**(variables or {}))
|
current_prompt = template.format(**(variables or {}))
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"❌ Prompt formatting failed for key '{prompt_key}': {e}")
|
logger.error(f"❌ Prompt formatting failed for key '{prompt_key}': {e}")
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user