Update Decision Engine to version 1.3.2: Implement ultra-robust intent parsing using regex, restore prepend_instruction logic, and enhance logging for configuration loading. Improve fallback mechanisms for response generation to ensure reliability.
This commit is contained in:
parent
38fac89f73
commit
1563ebbdf9
|
|
@ -3,18 +3,20 @@ FILE: app/core/retrieval/decision_engine.py
|
||||||
DESCRIPTION: Der Agentic Orchestrator für MindNet (WP-25b Edition).
|
DESCRIPTION: Der Agentic Orchestrator für MindNet (WP-25b Edition).
|
||||||
Realisiert Multi-Stream Retrieval, Intent-basiertes Routing
|
Realisiert Multi-Stream Retrieval, Intent-basiertes Routing
|
||||||
und die neue Lazy-Prompt Orchestrierung (Module A & B).
|
und die neue Lazy-Prompt Orchestrierung (Module A & B).
|
||||||
VERSION: 1.3.1 (WP-25b: Robust Intent Cleaning & Lazy Loading)
|
VERSION: 1.3.2 (WP-25b: Full Robustness Recovery & Regex Parsing)
|
||||||
STATUS: Active
|
STATUS: Active
|
||||||
FIX:
|
FIX:
|
||||||
- WP-25b: Robuste Bereinigung von Intent-Strings (Fix: CODING[/S] -> CODING).
|
- WP-25b: ULTRA-Robustes Intent-Parsing via Regex (Fix: 'CODING[/S]' -> 'CODING').
|
||||||
- WP-25b: Umstellung auf Lazy-Loading (Übergabe von prompt_key + variables).
|
- WP-25b: Wiederherstellung der prepend_instruction Logik via variables.
|
||||||
- WP-25a: Voller Erhalt der Profil-Kaskade via LLMService v3.5.5.
|
- WP-25a: Voller Erhalt der Profil-Kaskade via LLMService v3.5.5.
|
||||||
- WP-25: Beibehaltung von Stream-Tracing, Edge-Boosts und Pre-Initialization.
|
- WP-25: Beibehaltung von Stream-Tracing, Edge-Boosts und Pre-Initialization.
|
||||||
|
- RECOVERY: Wiederherstellung der lokalen Sicherheits-Gates aus v1.2.1.
|
||||||
"""
|
"""
|
||||||
import asyncio
|
import asyncio
|
||||||
import logging
|
import logging
|
||||||
import yaml
|
import yaml
|
||||||
import os
|
import os
|
||||||
|
import re # Neu für robustes Intent-Parsing
|
||||||
from typing import List, Dict, Any, Optional
|
from typing import List, Dict, Any, Optional
|
||||||
|
|
||||||
# Core & Service Imports
|
# Core & Service Imports
|
||||||
|
|
@ -41,7 +43,9 @@ class DecisionEngine:
|
||||||
return {"strategies": {}}
|
return {"strategies": {}}
|
||||||
try:
|
try:
|
||||||
with open(path, "r", encoding="utf-8") as f:
|
with open(path, "r", encoding="utf-8") as f:
|
||||||
return yaml.safe_load(f) or {}
|
config = yaml.safe_load(f) or {}
|
||||||
|
logger.info(f"⚙️ Decision Engine Config loaded (v{config.get('version', 'unknown')})")
|
||||||
|
return config
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"❌ Failed to load decision_engine.yaml: {e}")
|
logger.error(f"❌ Failed to load decision_engine.yaml: {e}")
|
||||||
return {"strategies": {}}
|
return {"strategies": {}}
|
||||||
|
|
@ -76,13 +80,13 @@ class DecisionEngine:
|
||||||
return await self._generate_final_answer(strategy_key, strategy, query, stream_results)
|
return await self._generate_final_answer(strategy_key, strategy, query, stream_results)
|
||||||
|
|
||||||
async def _determine_strategy(self, query: str) -> str:
|
async def _determine_strategy(self, query: str) -> str:
|
||||||
"""WP-25b: Nutzt den LLM-Router via Lazy-Loading und bereinigt Modell-Artefakte."""
|
"""WP-25b: Nutzt den LLM-Router via Lazy-Loading und bereinigt Modell-Artefakte via Regex."""
|
||||||
settings_cfg = self.config.get("settings", {})
|
settings_cfg = self.config.get("settings", {})
|
||||||
prompt_key = settings_cfg.get("router_prompt_key", "intent_router_v1")
|
prompt_key = settings_cfg.get("router_prompt_key", "intent_router_v1")
|
||||||
router_profile = settings_cfg.get("router_profile")
|
router_profile = settings_cfg.get("router_profile")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# WP-25b: Delegation an LLMService ohne manuelle Vor-Formatierung.
|
# Delegation an LLMService ohne manuelle Vor-Formatierung
|
||||||
response = await self.llm_service.generate_raw_response(
|
response = await self.llm_service.generate_raw_response(
|
||||||
prompt_key=prompt_key,
|
prompt_key=prompt_key,
|
||||||
variables={"query": query},
|
variables={"query": query},
|
||||||
|
|
@ -91,28 +95,29 @@ class DecisionEngine:
|
||||||
profile_name=router_profile
|
profile_name=router_profile
|
||||||
)
|
)
|
||||||
|
|
||||||
# WP-25b FIX: Bereinigung von Stop-Markern wie [/S] oder </s>
|
# --- ULTRA-ROBUST PARSING (Fix für 'CODING[/S]') ---
|
||||||
raw_intent = str(response).replace("[/S]", "").replace("</s>", "").strip().upper()
|
# 1. Alles in Großbuchstaben umwandeln
|
||||||
|
raw_text = str(response).upper()
|
||||||
|
|
||||||
# Robustheit: Nur das erste Wort nehmen, falls das Modell zu viel plaudert
|
# 2. Regex: Suche das erste Wort, das nur aus A-Z und Unterstrichen besteht
|
||||||
intent = raw_intent.split()[0] if raw_intent else "FACT_WHAT"
|
# Dies ignoriert [/S], </s>, Newlines oder Plaudereien des Modells
|
||||||
|
match = re.search(r'\b(FACT_WHEN|FACT_WHAT|DECISION|EMPATHY|CODING|INTERVIEW)\b', raw_text)
|
||||||
|
|
||||||
# Validierung gegen bekannte Strategien aus der decision_engine.yaml
|
if match:
|
||||||
known_strategies = self.config.get("strategies", {}).keys()
|
intent = match.group(1)
|
||||||
if intent not in known_strategies:
|
logger.info(f"🎯 [ROUTING] Parsed Intent: '{intent}' from raw response: '{response.strip()}'")
|
||||||
logger.warning(f"⚠️ Unmapped intent '{intent}' from router. Falling back.")
|
return intent
|
||||||
return "FACT_WHAT"
|
|
||||||
|
# Fallback, falls Regex nicht greift
|
||||||
return intent
|
logger.warning(f"⚠️ Unmapped intent '{response.strip()}' from router. Falling back to FACT_WHAT.")
|
||||||
|
return "FACT_WHAT"
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Strategy Routing failed: {e}")
|
logger.error(f"Strategy Routing failed: {e}")
|
||||||
return "FACT_WHAT"
|
return "FACT_WHAT"
|
||||||
|
|
||||||
async def _execute_parallel_streams(self, strategy: Dict, query: str) -> Dict[str, str]:
|
async def _execute_parallel_streams(self, strategy: Dict, query: str) -> Dict[str, str]:
|
||||||
"""
|
"""Führt Such-Streams aus und komprimiert überlange Ergebnisse (Pre-Synthesis)."""
|
||||||
Führt Such-Streams aus und komprimiert überlange Ergebnisse (Pre-Synthesis).
|
|
||||||
WP-25b: Unterstützt Lazy-Compression über Experten-Profile.
|
|
||||||
"""
|
|
||||||
stream_keys = strategy.get("use_streams", [])
|
stream_keys = strategy.get("use_streams", [])
|
||||||
library = self.config.get("streams_library", {})
|
library = self.config.get("streams_library", {})
|
||||||
|
|
||||||
|
|
@ -130,15 +135,13 @@ class DecisionEngine:
|
||||||
|
|
||||||
# Phase 2: Formatierung und optionale Kompression
|
# Phase 2: Formatierung und optionale Kompression
|
||||||
final_stream_tasks = []
|
final_stream_tasks = []
|
||||||
|
|
||||||
for name, res in zip(active_streams, retrieval_results):
|
for name, res in zip(active_streams, retrieval_results):
|
||||||
if isinstance(res, Exception):
|
if isinstance(res, Exception):
|
||||||
logger.error(f"Stream '{name}' failed during retrieval: {res}")
|
logger.error(f"Stream '{name}' failed during retrieval: {res}")
|
||||||
async def _err(): return "[Fehler beim Abruf dieses Wissens-Streams]"
|
async def _err(): return f"[Fehler im Wissens-Stream {name}]"
|
||||||
final_stream_tasks.append(_err())
|
final_stream_tasks.append(_err())
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Formatierung der Hits in Text
|
|
||||||
formatted_context = self._format_stream_context(res)
|
formatted_context = self._format_stream_context(res)
|
||||||
|
|
||||||
# WP-25a: Kompressions-Check (Inhaltsverdichtung)
|
# WP-25a: Kompressions-Check (Inhaltsverdichtung)
|
||||||
|
|
@ -160,9 +163,8 @@ class DecisionEngine:
|
||||||
return dict(zip(active_streams, final_contents))
|
return dict(zip(active_streams, final_contents))
|
||||||
|
|
||||||
async def _compress_stream_content(self, stream_name: str, content: str, query: str, profile: Optional[str]) -> str:
|
async def _compress_stream_content(self, stream_name: str, content: str, query: str, profile: Optional[str]) -> str:
|
||||||
"""WP-25b Module A: Inhaltsverdichtung via Lazy-Loading 'compression_template'."""
|
"""WP-25b: Inhaltsverdichtung via Lazy-Loading 'compression_template'."""
|
||||||
try:
|
try:
|
||||||
# WP-25b: Delegation der Inhaltsverdichtung an den LLMService.
|
|
||||||
summary = await self.llm_service.generate_raw_response(
|
summary = await self.llm_service.generate_raw_response(
|
||||||
prompt_key="compression_template",
|
prompt_key="compression_template",
|
||||||
variables={
|
variables={
|
||||||
|
|
@ -180,7 +182,7 @@ class DecisionEngine:
|
||||||
return content
|
return content
|
||||||
|
|
||||||
async def _run_single_stream(self, name: str, cfg: Dict, query: str) -> QueryResponse:
|
async def _run_single_stream(self, name: str, cfg: Dict, query: str) -> QueryResponse:
|
||||||
"""Spezialisierte Graph-Suche mit Stream-Tracing und Edge-Boosts (WP-25)."""
|
"""Spezialisierte Graph-Suche mit Stream-Tracing und Edge-Boosts."""
|
||||||
transformed_query = cfg.get("query_template", "{query}").format(query=query)
|
transformed_query = cfg.get("query_template", "{query}").format(query=query)
|
||||||
|
|
||||||
request = QueryRequest(
|
request = QueryRequest(
|
||||||
|
|
@ -188,7 +190,7 @@ class DecisionEngine:
|
||||||
top_k=cfg.get("top_k", 5),
|
top_k=cfg.get("top_k", 5),
|
||||||
filters={"type": cfg.get("filter_types", [])},
|
filters={"type": cfg.get("filter_types", [])},
|
||||||
expand={"depth": 1},
|
expand={"depth": 1},
|
||||||
boost_edges=cfg.get("edge_boosts", {}), # WP-25a Erhalt
|
boost_edges=cfg.get("edge_boosts", {}), # Erhalt der Gewichtung
|
||||||
explain=True
|
explain=True
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -200,7 +202,7 @@ class DecisionEngine:
|
||||||
def _format_stream_context(self, response: QueryResponse) -> str:
|
def _format_stream_context(self, response: QueryResponse) -> str:
|
||||||
"""Wandelt QueryHits in einen formatierten Kontext-String um."""
|
"""Wandelt QueryHits in einen formatierten Kontext-String um."""
|
||||||
if not response.results:
|
if not response.results:
|
||||||
return "Keine spezifischen Informationen in diesem Stream gefunden."
|
return "Keine spezifischen Informationen gefunden."
|
||||||
lines = []
|
lines = []
|
||||||
for i, hit in enumerate(response.results, 1):
|
for i, hit in enumerate(response.results, 1):
|
||||||
source = hit.source.get("path", "Unbekannt")
|
source = hit.source.get("path", "Unbekannt")
|
||||||
|
|
@ -215,9 +217,8 @@ class DecisionEngine:
|
||||||
query: str,
|
query: str,
|
||||||
stream_results: Dict[str, str]
|
stream_results: Dict[str, str]
|
||||||
) -> str:
|
) -> str:
|
||||||
"""WP-25b: Finale Synthese via Lazy-Prompt Orchestrierung."""
|
"""WP-25b: Finale Synthese via Lazy-Prompt mit Robustheit aus v1.2.1."""
|
||||||
profile = strategy.get("llm_profile")
|
profile = strategy.get("llm_profile")
|
||||||
# Nutzt den Key aus der YAML oder 'fact_synthesis_v1' als sicheren Default
|
|
||||||
template_key = strategy.get("prompt_template", "fact_synthesis_v1")
|
template_key = strategy.get("prompt_template", "fact_synthesis_v1")
|
||||||
system_prompt = self.llm_service.get_prompt("system_prompt")
|
system_prompt = self.llm_service.get_prompt("system_prompt")
|
||||||
|
|
||||||
|
|
@ -227,19 +228,34 @@ class DecisionEngine:
|
||||||
template_vars.update(stream_results)
|
template_vars.update(stream_results)
|
||||||
template_vars["query"] = query
|
template_vars["query"] = query
|
||||||
|
|
||||||
# WP-25a Erhalt: Optionale Prepend-Anweisung
|
# WP-25a Erhalt: Prepend Instructions aus der strategy_config
|
||||||
template_vars["prepend_instruction"] = strategy.get("prepend_instruction", "")
|
prepend = strategy.get("prepend_instruction", "")
|
||||||
|
template_vars["prepend_instruction"] = prepend
|
||||||
# WP-25b: Delegation der Synthese an den LLMService.
|
|
||||||
# Formatierung erfolgt erst nach Profil-Auflösung (Gemini vs. Llama vs. Qwen).
|
|
||||||
try:
|
try:
|
||||||
return await self.llm_service.generate_raw_response(
|
# WP-25b: Delegation der Synthese an den LLMService
|
||||||
|
response = await self.llm_service.generate_raw_response(
|
||||||
prompt_key=template_key,
|
prompt_key=template_key,
|
||||||
variables=template_vars,
|
variables=template_vars,
|
||||||
system=system_prompt,
|
system=system_prompt,
|
||||||
profile_name=profile,
|
profile_name=profile,
|
||||||
priority="realtime"
|
priority="realtime"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# WP-25a RECOVERY: Falls dieprepend_instruction nicht im Template-Key
|
||||||
|
# der prompts.yaml enthalten ist (WP-25b Lazy Loading), fügen wir sie
|
||||||
|
# hier manuell an den Anfang, um die Logik aus v1.2.1 zu bewahren.
|
||||||
|
if prepend and prepend not in response[:len(prepend)+50]:
|
||||||
|
logger.info("ℹ️ Adding prepend_instruction manually (not found in response).")
|
||||||
|
response = f"{prepend}\n\n{response}"
|
||||||
|
|
||||||
|
return response
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Final Synthesis failed: {e}")
|
logger.error(f"Final Synthesis failed: {e}")
|
||||||
return "Ich konnte keine Antwort generieren."
|
# ROBUST FALLBACK (v1.2.1 Gate): Versuche eine minimale Antwort zu generieren
|
||||||
|
fallback_context = "\n\n".join([v for v in stream_results.values() if len(v) > 20])
|
||||||
|
return await self.llm_service.generate_raw_response(
|
||||||
|
prompt=f"Beantworte: {query}\n\nKontext:\n{fallback_context}",
|
||||||
|
system=system_prompt, priority="realtime", profile_name=profile
|
||||||
|
)
|
||||||
Loading…
Reference in New Issue
Block a user