mindnet/app/core/retrieval/decision_engine.py
Lars 742792770c Implement Phase 3 Agentic Edge Validation in ingestion_processor.py and related documentation updates
Introduce a new method for persisting rejected edges for audit purposes, enhancing traceability and validation logic. Update the decision engine to utilize a generic fallback template for improved error handling during LLM validation. Revise documentation across multiple files to reflect the new versioning, context, and features related to Phase 3 validation, including automatic mirror edges and note-scope zones. This update ensures better graph integrity and validation accuracy in the ingestion process.
2026-01-12 07:45:54 +01:00

378 lines
19 KiB
Python
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
FILE: app/core/retrieval/decision_engine.py
DESCRIPTION: Der Agentic Orchestrator für MindNet (WP-25b Edition).
Realisiert Multi-Stream Retrieval, Intent-basiertes Routing
und die neue Lazy-Prompt Orchestrierung (Module A & B).
VERSION: 1.3.2 (WP-25b: Full Robustness Recovery & Regex Parsing)
STATUS: Active
FIX:
- WP-25b: ULTRA-Robustes Intent-Parsing via Regex (Fix: 'CODING[/S]' -> 'CODING').
- WP-25b: Wiederherstellung der prepend_instruction Logik via variables.
- WP-25a: Voller Erhalt der Profil-Kaskade via LLMService v3.5.5.
- WP-25: Beibehaltung von Stream-Tracing, Edge-Boosts und Pre-Initialization.
- RECOVERY: Wiederherstellung der lokalen Sicherheits-Gates aus v1.2.1.
"""
import asyncio
import logging
import yaml
import os
import re # Neu für robustes Intent-Parsing
from typing import List, Dict, Any, Optional
# Core & Service Imports
from app.models.dto import QueryRequest, QueryResponse
from app.core.retrieval.retriever import Retriever
from app.services.llm_service import LLMService
from app.config import get_settings
logger = logging.getLogger(__name__)
class DecisionEngine:
def __init__(self):
"""Initialisiert die Engine und lädt die modularen Konfigurationen."""
self.settings = get_settings()
self.retriever = Retriever()
self.llm_service = LLMService()
self.config = self._load_engine_config()
def _load_engine_config(self) -> Dict[str, Any]:
"""Lädt die Multi-Stream Konfiguration (WP-25/25a)."""
path = os.getenv("MINDNET_DECISION_CONFIG", "config/decision_engine.yaml")
if not os.path.exists(path):
logger.error(f"❌ Decision Engine Config not found at {path}")
return {"strategies": {}, "streams_library": {}}
try:
with open(path, "r", encoding="utf-8") as f:
config = yaml.safe_load(f) or {}
# WP-25b FIX: Schema-Validierung
required_keys = ["strategies", "streams_library"]
missing = [k for k in required_keys if k not in config]
if missing:
logger.error(f"❌ Missing required keys in decision_engine.yaml: {missing}")
return {"strategies": {}, "streams_library": {}}
# Warnung bei unbekannten Top-Level-Keys
known_keys = {"version", "settings", "strategies", "streams_library"}
unknown = set(config.keys()) - known_keys
if unknown:
logger.warning(f"⚠️ Unknown keys in decision_engine.yaml: {unknown}")
logger.info(f"⚙️ Decision Engine Config loaded (v{config.get('version', 'unknown')})")
return config
except yaml.YAMLError as e:
logger.error(f"❌ YAML syntax error in decision_engine.yaml: {e}")
return {"strategies": {}, "streams_library": {}}
except Exception as e:
logger.error(f"❌ Failed to load decision_engine.yaml: {e}")
return {"strategies": {}, "streams_library": {}}
async def ask(self, query: str) -> str:
"""
Hauptmethode des MindNet Chats.
Orchestriert den agentischen Prozess: Routing -> Retrieval -> Kompression -> Synthese.
"""
# 1. Intent Recognition (Strategy Routing)
strategy_key = await self._determine_strategy(query)
strategies = self.config.get("strategies", {})
strategy = strategies.get(strategy_key)
if not strategy:
logger.warning(f"⚠️ Unknown strategy '{strategy_key}'. Fallback to FACT_WHAT.")
strategy_key = "FACT_WHAT"
strategy = strategies.get("FACT_WHAT")
if not strategy and strategies:
strategy_key = next(iter(strategies))
strategy = strategies[strategy_key]
if not strategy:
return "Entschuldigung, meine Wissensbasis ist aktuell nicht konfiguriert."
# 2. Multi-Stream Retrieval & Pre-Synthesis (Parallel Tasks inkl. Kompression)
stream_results = await self._execute_parallel_streams(strategy, query)
# 3. Finale Synthese
return await self._generate_final_answer(strategy_key, strategy, query, stream_results)
async def _determine_strategy(self, query: str) -> str:
"""WP-25b: Nutzt den LLM-Router via Lazy-Loading und bereinigt Modell-Artefakte via Regex."""
settings_cfg = self.config.get("settings", {})
prompt_key = settings_cfg.get("router_prompt_key", "intent_router_v1")
router_profile = settings_cfg.get("router_profile")
try:
# Delegation an LLMService ohne manuelle Vor-Formatierung
response = await self.llm_service.generate_raw_response(
prompt_key=prompt_key,
variables={"query": query},
max_retries=1,
priority="realtime",
profile_name=router_profile
)
# --- ULTRA-ROBUST PARSING (Fix für 'CODING[/S]') ---
# 1. Alles in Großbuchstaben umwandeln
raw_text = str(response).upper()
# 2. Regex: Suche das erste Wort, das nur aus A-Z und Unterstrichen besteht
# Dies ignoriert [/S], </s>, Newlines oder Plaudereien des Modells
match = re.search(r'\b(FACT_WHEN|FACT_WHAT|DECISION|EMPATHY|CODING|INTERVIEW)\b', raw_text)
if match:
intent = match.group(1)
logger.info(f"🎯 [ROUTING] Parsed Intent: '{intent}' from raw response: '{response.strip()}'")
return intent
# Fallback, falls Regex nicht greift
logger.warning(f"⚠️ Unmapped intent '{response.strip()}' from router. Falling back to FACT_WHAT.")
return "FACT_WHAT"
except Exception as e:
logger.error(f"Strategy Routing failed: {e}")
return "FACT_WHAT"
async def _execute_parallel_streams(self, strategy: Dict, query: str) -> Dict[str, str]:
"""Führt Such-Streams aus und komprimiert überlange Ergebnisse (Pre-Synthesis)."""
stream_keys = strategy.get("use_streams", [])
library = self.config.get("streams_library", {})
# Phase 1: Retrieval Tasks starten
retrieval_tasks = []
active_streams = []
for key in stream_keys:
stream_cfg = library.get(key)
if stream_cfg:
active_streams.append(key)
retrieval_tasks.append(self._run_single_stream(key, stream_cfg, query))
# Ergebnisse sammeln
retrieval_results = await asyncio.gather(*retrieval_tasks, return_exceptions=True)
# Phase 2: Formatierung und optionale Kompression
# WP-24c v4.5.5: Context-Reuse - Sicherstellen, dass formatted_context auch bei Kompressions-Fehlern erhalten bleibt
final_stream_tasks = []
formatted_contexts = {} # WP-24c v4.5.5: Persistenz für Fallback-Zugriff
for name, res in zip(active_streams, retrieval_results):
if isinstance(res, Exception):
logger.error(f"Stream '{name}' failed during retrieval: {res}")
error_msg = f"[Fehler im Wissens-Stream {name}]"
formatted_contexts[name] = error_msg
async def _err(msg=error_msg): return msg
final_stream_tasks.append(_err())
continue
formatted_context = self._format_stream_context(res)
formatted_contexts[name] = formatted_context # WP-24c v4.5.5: Persistenz für Fallback
# WP-25a: Kompressions-Check (Inhaltsverdichtung)
stream_cfg = library.get(name, {})
threshold = stream_cfg.get("compression_threshold", 4000)
if len(formatted_context) > threshold:
logger.info(f"⚙️ [WP-25b] Triggering Lazy-Compression for stream '{name}'...")
comp_profile = stream_cfg.get("compression_profile")
# WP-24c v4.5.5: Kompression mit Context-Reuse - bei Fehler wird formatted_context zurückgegeben
final_stream_tasks.append(
self._compress_stream_content(name, formatted_context, query, comp_profile)
)
else:
async def _direct(c=formatted_context): return c
final_stream_tasks.append(_direct())
# Finale Inhalte parallel fertigstellen
# WP-24c v4.5.5: Bei Kompressions-Fehlern wird der Original-Content zurückgegeben (siehe _compress_stream_content)
final_contents = await asyncio.gather(*final_stream_tasks, return_exceptions=True)
# WP-24c v4.5.5: Exception-Handling für finale Inhalte - verwende Original-Content bei Fehlern
final_results = {}
for name, content in zip(active_streams, final_contents):
if isinstance(content, Exception):
logger.warning(f"⚠️ [CONTEXT-REUSE] Stream '{name}' Fehler in finaler Verarbeitung: {content}. Verwende Original-Context.")
final_results[name] = formatted_contexts.get(name, f"[Fehler im Stream {name}]")
else:
final_results[name] = content
logger.debug(f"📊 [STREAMS] Finale Stream-Ergebnisse: {[(k, len(v)) for k, v in final_results.items()]}")
return final_results
async def _compress_stream_content(self, stream_name: str, content: str, query: str, profile: Optional[str]) -> str:
"""
WP-25b: Inhaltsverdichtung via Lazy-Loading 'compression_template'.
WP-24c v4.5.5: Context-Reuse - Bei Fehlern wird der Original-Content zurückgegeben,
um Re-Retrieval zu vermeiden.
"""
try:
# WP-24c v4.5.5: Logging für LLM-Trace im Kompressions-Modus
logger.debug(f"🔧 [COMPRESSION] Starte Kompression für Stream '{stream_name}' (Content-Länge: {len(content)})")
summary = await self.llm_service.generate_raw_response(
prompt_key="compression_template",
variables={
"stream_name": stream_name,
"content": content,
"query": query
},
profile_name=profile,
priority="background",
max_retries=1
)
# WP-24c v4.5.5: Validierung des Kompressions-Ergebnisses
if summary and len(summary.strip()) > 10:
logger.debug(f"✅ [COMPRESSION] Kompression erfolgreich für '{stream_name}' (Original: {len(content)}, Komprimiert: {len(summary)})")
return summary.strip()
else:
logger.warning(f"⚠️ [COMPRESSION] Kompressions-Ergebnis zu kurz für '{stream_name}', verwende Original-Content")
return content
except Exception as e:
# WP-24c v4.5.5: Context-Reuse - Bei Fehlern Original-Content zurückgeben (kein Re-Retrieval)
logger.error(f"❌ [COMPRESSION] Kompression von '{stream_name}' fehlgeschlagen: {e}")
logger.info(f"🔄 [CONTEXT-REUSE] Verwende Original-Content für '{stream_name}' (Länge: {len(content)}) - KEIN Re-Retrieval")
return content
async def _run_single_stream(self, name: str, cfg: Dict, query: str) -> QueryResponse:
"""Spezialisierte Graph-Suche mit Stream-Tracing und Edge-Boosts."""
transformed_query = cfg.get("query_template", "{query}").format(query=query)
request = QueryRequest(
query=transformed_query,
top_k=cfg.get("top_k", 5),
filters={"type": cfg.get("filter_types", [])},
expand={"depth": 1},
boost_edges=cfg.get("edge_boosts", {}), # Erhalt der Gewichtung
explain=True
)
# WP-24c v4.5.0-DEBUG: Retrieval-Tracer - Protokollierung vor der Suche
logger.info(f"🔍 [RETRIEVAL] Starte Stream: '{name}'")
logger.info(f" -> Transformierte Query: '{transformed_query}'")
logger.debug(f" ⚙️ [FILTER] Angewandte Metadaten-Filter: {request.filters}")
logger.debug(f" ⚙️ [FILTER] Top-K: {request.top_k}, Expand-Depth: {request.expand.get('depth') if request.expand else None}")
response = await self.retriever.search(request)
# WP-24c v4.5.0-DEBUG: Retrieval-Tracer - Protokollierung nach der Suche
if not response.results:
logger.warning(f"⚠️ [EMPTY] Stream '{name}' lieferte 0 Ergebnisse.")
else:
logger.info(f"✨ [SUCCESS] Stream '{name}' lieferte {len(response.results)} Treffer.")
# Top 3 Treffer im DEBUG-Level loggen
# WP-24c v4.5.4: QueryHit hat kein chunk_id Feld - verwende node_id (enthält die Chunk-ID)
for i, hit in enumerate(response.results[:3]):
chunk_id = hit.node_id # node_id ist die Chunk-ID (pid)
score = hit.total_score # QueryHit hat total_score, nicht score
logger.debug(f" [{i+1}] Chunk: {chunk_id} | Score: {score:.4f} | Path: {hit.source.get('path', 'N/A') if hit.source else 'N/A'}")
for hit in response.results:
hit.stream_origin = name
return response
def _format_stream_context(self, response: QueryResponse) -> str:
"""Wandelt QueryHits in einen formatierten Kontext-String um."""
if not response.results:
return "Keine spezifischen Informationen gefunden."
lines = []
for i, hit in enumerate(response.results, 1):
source = hit.source.get("path", "Unbekannt")
content = hit.source.get("text", "").strip()
lines.append(f"[{i}] QUELLE: {source}\nINHALT: {content}")
return "\n\n".join(lines)
async def _generate_final_answer(
self,
strategy_key: str,
strategy: Dict,
query: str,
stream_results: Dict[str, str]
) -> str:
"""WP-25b: Finale Synthese via Lazy-Prompt mit Robustheit aus v1.2.1."""
profile = strategy.get("llm_profile")
template_key = strategy.get("prompt_template", "fact_synthesis_v1")
system_prompt = self.llm_service.get_prompt("system_prompt")
# WP-25 ROBUSTNESS: Pre-Initialization der Variablen
all_possible_streams = ["values_stream", "facts_stream", "biography_stream", "risk_stream", "tech_stream"]
template_vars = {s: "" for s in all_possible_streams}
template_vars.update(stream_results)
template_vars["query"] = query
# WP-25a Erhalt: Prepend Instructions aus der strategy_config
prepend = strategy.get("prepend_instruction", "")
template_vars["prepend_instruction"] = prepend
try:
# WP-25b: Delegation der Synthese an den LLMService
response = await self.llm_service.generate_raw_response(
prompt_key=template_key,
variables=template_vars,
system=system_prompt,
profile_name=profile,
priority="realtime"
)
# WP-25a RECOVERY: Falls dieprepend_instruction nicht im Template-Key
# der prompts.yaml enthalten ist (WP-25b Lazy Loading), fügen wir sie
# hier manuell an den Anfang, um die Logik aus v1.2.1 zu bewahren.
if prepend and prepend not in response[:len(prepend)+50]:
logger.info(" Adding prepend_instruction manually (not found in response).")
response = f"{prepend}\n\n{response}"
return response
except Exception as e:
logger.error(f"Final Synthesis failed: {e}")
# WP-24c v4.5.5: ROBUST FALLBACK mit Context-Reuse
# WICHTIG: stream_results werden Wiederverwendet - KEIN Re-Retrieval
logger.info(f"🔄 [FALLBACK] Verwende vorhandene stream_results (KEIN Re-Retrieval)")
logger.debug(f" -> Verfügbare Streams: {list(stream_results.keys())}")
logger.debug(f" -> Stream-Längen: {[(k, len(v)) for k, v in stream_results.items()]}")
# WP-24c v4.5.5: Context-Reuse - Nutze vorhandene stream_results
fallback_context = "\n\n".join([v for v in stream_results.values() if len(v) > 20])
if not fallback_context or len(fallback_context.strip()) < 20:
logger.warning(f"⚠️ [FALLBACK] Fallback-Context zu kurz ({len(fallback_context)} Zeichen). Stream-Ergebnisse möglicherweise leer.")
return f"Entschuldigung, ich konnte keine relevanten Informationen zu Ihrer Anfrage finden. (Fehler: {str(e)})"
try:
# WP-24c v4.5.5: Fallback-Synthese mit LLM-Trace-Logging
logger.info(f"🔄 [FALLBACK] Starte Fallback-Synthese mit vorhandenem Context (Länge: {len(fallback_context)})")
logger.debug(f" -> Fallback-Profile: {profile}, Template: fallback_synthesis")
result = await self.llm_service.generate_raw_response(
prompt_key="fallback_synthesis",
variables={"query": query, "context": fallback_context},
system=system_prompt, priority="realtime", profile_name=profile
)
logger.info(f"✅ [FALLBACK] Fallback-Synthese erfolgreich (Antwort-Länge: {len(result) if result else 0})")
return result
except (ValueError, KeyError) as template_error:
# WP-24c v4.5.9: Fallback auf generisches Template mit variables
# Nutzt Lazy-Loading aus WP-25b für modell-spezifische Fallback-Prompts
logger.warning(f"⚠️ [FALLBACK] Template 'fallback_synthesis' nicht gefunden: {template_error}. Versuche generisches Template.")
logger.debug(f" -> Fallback-Profile: {profile}, Context-Länge: {len(fallback_context)}")
try:
# WP-24c v4.5.9: Versuche generisches Template mit variables (Lazy-Loading)
result = await self.llm_service.generate_raw_response(
prompt_key="fallback_synthesis_generic", # Fallback-Template
variables={"query": query, "context": fallback_context},
system=system_prompt, priority="realtime", profile_name=profile
)
logger.info(f"✅ [FALLBACK] Generisches Template erfolgreich (Antwort-Länge: {len(result) if result else 0})")
return result
except (ValueError, KeyError) as fallback_error:
# WP-24c v4.5.9: Letzter Fallback - direkter Prompt (nur wenn beide Templates fehlen)
logger.error(f"❌ [FALLBACK] Auch generisches Template nicht gefunden: {fallback_error}. Verwende direkten Prompt als letzten Fallback.")
result = await self.llm_service.generate_raw_response(
prompt=f"Beantworte: {query}\n\nKontext:\n{fallback_context}",
system=system_prompt, priority="realtime", profile_name=profile
)
logger.info(f"✅ [FALLBACK] Direkter Prompt erfolgreich (Antwort-Länge: {len(result) if result else 0})")
return result