Update Decision Engine for WP-25b: Enhance intent processing with robust intent cleaning and lazy loading. Improve strategy determination by validating against known strategies and streamline response generation. Bump version to 1.3.1 to reflect these optimizations.
This commit is contained in:
parent
7026fc4fed
commit
38fac89f73
|
|
@ -3,13 +3,13 @@ FILE: app/core/retrieval/decision_engine.py
|
|||
DESCRIPTION: Der Agentic Orchestrator für MindNet (WP-25b Edition).
|
||||
Realisiert Multi-Stream Retrieval, Intent-basiertes Routing
|
||||
und die neue Lazy-Prompt Orchestrierung (Module A & B).
|
||||
VERSION: 1.3.0 (WP-25b: Lazy Prompt Orchestration)
|
||||
VERSION: 1.3.1 (WP-25b: Robust Intent Cleaning & Lazy Loading)
|
||||
STATUS: Active
|
||||
FIX:
|
||||
- WP-25b: Robuste Bereinigung von Intent-Strings (Fix: CODING[/S] -> CODING).
|
||||
- WP-25b: Umstellung auf Lazy-Loading (Übergabe von prompt_key + variables).
|
||||
- WP-25b: Entfernung lokaler String-Formatierung zur Ermöglichung modell-spezifischer Prompts.
|
||||
- WP-25a: Volle Integration der Profil-Kaskade via LLMService v3.5.5.
|
||||
- WP-25: Beibehaltung von Stream-Tracing und Pre-Initialization Robustness.
|
||||
- WP-25a: Voller Erhalt der Profil-Kaskade via LLMService v3.5.5.
|
||||
- WP-25: Beibehaltung von Stream-Tracing, Edge-Boosts und Pre-Initialization.
|
||||
"""
|
||||
import asyncio
|
||||
import logging
|
||||
|
|
@ -76,14 +76,13 @@ class DecisionEngine:
|
|||
return await self._generate_final_answer(strategy_key, strategy, query, stream_results)
|
||||
|
||||
async def _determine_strategy(self, query: str) -> str:
|
||||
"""WP-25b: Nutzt den LLM-Router via Lazy-Loading prompt_key."""
|
||||
"""WP-25b: Nutzt den LLM-Router via Lazy-Loading und bereinigt Modell-Artefakte."""
|
||||
settings_cfg = self.config.get("settings", {})
|
||||
prompt_key = settings_cfg.get("router_prompt_key", "intent_router_v1")
|
||||
router_profile = settings_cfg.get("router_profile")
|
||||
|
||||
try:
|
||||
# WP-25b: Keine manuelle Formatierung mehr. Wir übergeben nur Key und Variablen.
|
||||
# Der LLMService wählt den passenden Prompt für das router_profile Modell.
|
||||
# WP-25b: Delegation an LLMService ohne manuelle Vor-Formatierung.
|
||||
response = await self.llm_service.generate_raw_response(
|
||||
prompt_key=prompt_key,
|
||||
variables={"query": query},
|
||||
|
|
@ -91,7 +90,20 @@ class DecisionEngine:
|
|||
priority="realtime",
|
||||
profile_name=router_profile
|
||||
)
|
||||
return str(response).strip().upper()
|
||||
|
||||
# WP-25b FIX: Bereinigung von Stop-Markern wie [/S] oder </s>
|
||||
raw_intent = str(response).replace("[/S]", "").replace("</s>", "").strip().upper()
|
||||
|
||||
# Robustheit: Nur das erste Wort nehmen, falls das Modell zu viel plaudert
|
||||
intent = raw_intent.split()[0] if raw_intent else "FACT_WHAT"
|
||||
|
||||
# Validierung gegen bekannte Strategien aus der decision_engine.yaml
|
||||
known_strategies = self.config.get("strategies", {}).keys()
|
||||
if intent not in known_strategies:
|
||||
logger.warning(f"⚠️ Unmapped intent '{intent}' from router. Falling back.")
|
||||
return "FACT_WHAT"
|
||||
|
||||
return intent
|
||||
except Exception as e:
|
||||
logger.error(f"Strategy Routing failed: {e}")
|
||||
return "FACT_WHAT"
|
||||
|
|
@ -150,8 +162,7 @@ class DecisionEngine:
|
|||
async def _compress_stream_content(self, stream_name: str, content: str, query: str, profile: Optional[str]) -> str:
|
||||
"""WP-25b Module A: Inhaltsverdichtung via Lazy-Loading 'compression_template'."""
|
||||
try:
|
||||
# WP-25b: Wir übergeben den Auftrag an den LLMService.
|
||||
# Das Modell-spezifische Template wird erst beim Call aufgelöst.
|
||||
# WP-25b: Delegation der Inhaltsverdichtung an den LLMService.
|
||||
summary = await self.llm_service.generate_raw_response(
|
||||
prompt_key="compression_template",
|
||||
variables={
|
||||
|
|
@ -169,7 +180,7 @@ class DecisionEngine:
|
|||
return content
|
||||
|
||||
async def _run_single_stream(self, name: str, cfg: Dict, query: str) -> QueryResponse:
|
||||
"""Spezialisierte Graph-Suche mit Stream-Tracing (WP-25)."""
|
||||
"""Spezialisierte Graph-Suche mit Stream-Tracing und Edge-Boosts (WP-25)."""
|
||||
transformed_query = cfg.get("query_template", "{query}").format(query=query)
|
||||
|
||||
request = QueryRequest(
|
||||
|
|
@ -177,7 +188,7 @@ class DecisionEngine:
|
|||
top_k=cfg.get("top_k", 5),
|
||||
filters={"type": cfg.get("filter_types", [])},
|
||||
expand={"depth": 1},
|
||||
boost_edges=cfg.get("edge_boosts", {}),
|
||||
boost_edges=cfg.get("edge_boosts", {}), # WP-25a Erhalt
|
||||
explain=True
|
||||
)
|
||||
|
||||
|
|
@ -204,19 +215,23 @@ class DecisionEngine:
|
|||
query: str,
|
||||
stream_results: Dict[str, str]
|
||||
) -> str:
|
||||
"""WP-25b: Finale Synthese via Lazy-Prompt 'rag_template'."""
|
||||
"""WP-25b: Finale Synthese via Lazy-Prompt Orchestrierung."""
|
||||
profile = strategy.get("llm_profile")
|
||||
template_key = strategy.get("prompt_template", "rag_template")
|
||||
# Nutzt den Key aus der YAML oder 'fact_synthesis_v1' als sicheren Default
|
||||
template_key = strategy.get("prompt_template", "fact_synthesis_v1")
|
||||
system_prompt = self.llm_service.get_prompt("system_prompt")
|
||||
|
||||
# WP-25 ROBUSTNESS: Pre-Initialization
|
||||
# WP-25 ROBUSTNESS: Pre-Initialization der Variablen
|
||||
all_possible_streams = ["values_stream", "facts_stream", "biography_stream", "risk_stream", "tech_stream"]
|
||||
template_vars = {s: "" for s in all_possible_streams}
|
||||
template_vars.update(stream_results)
|
||||
template_vars["query"] = query
|
||||
|
||||
# WP-25b: Wir reichen die Variablen direkt an den Service weiter.
|
||||
# Formatierung erfolgt erst nach Profil-Auflösung (Gemini vs. Llama vs. Phi3).
|
||||
# WP-25a Erhalt: Optionale Prepend-Anweisung
|
||||
template_vars["prepend_instruction"] = strategy.get("prepend_instruction", "")
|
||||
|
||||
# WP-25b: Delegation der Synthese an den LLMService.
|
||||
# Formatierung erfolgt erst nach Profil-Auflösung (Gemini vs. Llama vs. Qwen).
|
||||
try:
|
||||
return await self.llm_service.generate_raw_response(
|
||||
prompt_key=template_key,
|
||||
|
|
|
|||
|
|
@ -108,7 +108,7 @@ class LLMService:
|
|||
if not isinstance(data, dict):
|
||||
return str(data)
|
||||
|
||||
# 1. Spezifischstes Match: Exakte Modell-ID (z.B. 'meta-llama/llama-3.3-70b-instruct:free')
|
||||
# 1. Spezifischstes Match: Exakte Modell-ID (z.B. 'google/gemini-2.0-flash-exp:free')
|
||||
if model_id and model_id in data:
|
||||
return str(data[model_id])
|
||||
|
||||
|
|
@ -166,6 +166,7 @@ class LLMService:
|
|||
if prompt_key:
|
||||
template = self.get_prompt(prompt_key, model_id=target_model, provider=target_provider)
|
||||
try:
|
||||
# Formatierung mit den übergebenen Variablen
|
||||
current_prompt = template.format(**(variables or {}))
|
||||
except Exception as e:
|
||||
logger.error(f"❌ Prompt formatting failed for key '{prompt_key}': {e}")
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user