From 1563ebbdf915fd07fb756cf371d1781fb0f8b8b6 Mon Sep 17 00:00:00 2001
From: Lars <Lars@stommer.de>
Date: Fri, 2 Jan 2026 21:42:09 +0100
Subject: [PATCH] Update Decision Engine to version 1.3.2: Implement
 ultra-robust intent parsing using regex, restore prepend_instruction logic,
 and enhance logging for configuration loading. Improve fallback mechanisms
 for response generation to ensure reliability.

---
 app/core/retrieval/decision_engine.py | 92 ++++++++++++++++-----------
 1 file changed, 54 insertions(+), 38 deletions(-)
diff --git a/app/core/retrieval/decision_engine.py b/app/core/retrieval/decision_engine.py
index 75dd7ef..cb26747 100644
--- a/app/core/retrieval/decision_engine.py
+++ b/app/core/retrieval/decision_engine.py
@@ -3,18 +3,20 @@ FILE: app/core/retrieval/decision_engine.py
 DESCRIPTION: Der Agentic Orchestrator für MindNet (WP-25b Edition). 
              Realisiert Multi-Stream Retrieval, Intent-basiertes Routing 
              und die neue Lazy-Prompt Orchestrierung (Module A & B).
-VERSION: 1.3.1 (WP-25b: Robust Intent Cleaning & Lazy Loading)
+VERSION: 1.3.2 (WP-25b: Full Robustness Recovery & Regex Parsing)
 STATUS: Active
 FIX: 
-- WP-25b: Robuste Bereinigung von Intent-Strings (Fix: CODING[/S] -> CODING).
-- WP-25b: Umstellung auf Lazy-Loading (Übergabe von prompt_key + variables).
+- WP-25b: ULTRA-Robustes Intent-Parsing via Regex (Fix: 'CODING[/S]' -> 'CODING').
+- WP-25b: Wiederherstellung der prepend_instruction Logik via variables.
 - WP-25a: Voller Erhalt der Profil-Kaskade via LLMService v3.5.5.
 - WP-25: Beibehaltung von Stream-Tracing, Edge-Boosts und Pre-Initialization.
+- RECOVERY: Wiederherstellung der lokalen Sicherheits-Gates aus v1.2.1.
 """
 import asyncio
 import logging
 import yaml
 import os
+import re  # Neu für robustes Intent-Parsing
 from typing import List, Dict, Any, Optional
 
 # Core & Service Imports
@@ -41,7 +43,9 @@ class DecisionEngine:
             return {"strategies": {}}
         try:
             with open(path, "r", encoding="utf-8") as f:
-                return yaml.safe_load(f) or {}
+                config = yaml.safe_load(f) or {}
+                logger.info(f"⚙️ Decision Engine Config loaded (v{config.get('version', 'unknown')})")
+                return config
         except Exception as e:
             logger.error(f"❌ Failed to load decision_engine.yaml: {e}")
             return {"strategies": {}}
@@ -76,13 +80,13 @@ class DecisionEngine:
         return await self._generate_final_answer(strategy_key, strategy, query, stream_results)
 
     async def _determine_strategy(self, query: str) -> str:
-        """WP-25b: Nutzt den LLM-Router via Lazy-Loading und bereinigt Modell-Artefakte."""
+        """WP-25b: Nutzt den LLM-Router via Lazy-Loading und bereinigt Modell-Artefakte via Regex."""
         settings_cfg = self.config.get("settings", {})
         prompt_key = settings_cfg.get("router_prompt_key", "intent_router_v1")
         router_profile = settings_cfg.get("router_profile")
         
         try:
-            # WP-25b: Delegation an LLMService ohne manuelle Vor-Formatierung.
+            # Delegation an LLMService ohne manuelle Vor-Formatierung
             response = await self.llm_service.generate_raw_response(
                 prompt_key=prompt_key,
                 variables={"query": query},
@@ -91,28 +95,29 @@ class DecisionEngine:
                 profile_name=router_profile
             )
             
-            # WP-25b FIX: Bereinigung von Stop-Markern wie [/S] oder </s>
-            raw_intent = str(response).replace("[/S]", "").replace("</s>", "").strip().upper()
+            # --- ULTRA-ROBUST PARSING (Fix für 'CODING[/S]') ---
+            # 1. Alles in Großbuchstaben umwandeln
+            raw_text = str(response).upper()
             
-            # Robustheit: Nur das erste Wort nehmen, falls das Modell zu viel plaudert
-            intent = raw_intent.split()[0] if raw_intent else "FACT_WHAT"
+            # 2. Regex: Suche das erste Wort, das nur aus A-Z und Unterstrichen besteht
+            # Dies ignoriert [/S], </s>, Newlines oder Plaudereien des Modells
+            match = re.search(r'\b(FACT_WHEN|FACT_WHAT|DECISION|EMPATHY|CODING|INTERVIEW)\b', raw_text)
             
-            # Validierung gegen bekannte Strategien aus der decision_engine.yaml
-            known_strategies = self.config.get("strategies", {}).keys()
-            if intent not in known_strategies:
-                logger.warning(f"⚠️ Unmapped intent '{intent}' from router. Falling back.")
-                return "FACT_WHAT"
-                
-            return intent
+            if match:
+                intent = match.group(1)
+                logger.info(f"🎯 [ROUTING] Parsed Intent: '{intent}' from raw response: '{response.strip()}'")
+                return intent
+
+            # Fallback, falls Regex nicht greift
+            logger.warning(f"⚠️ Unmapped intent '{response.strip()}' from router. Falling back to FACT_WHAT.")
+            return "FACT_WHAT"
+
         except Exception as e:
             logger.error(f"Strategy Routing failed: {e}")
             return "FACT_WHAT"
 
     async def _execute_parallel_streams(self, strategy: Dict, query: str) -> Dict[str, str]:
-        """
-        Führt Such-Streams aus und komprimiert überlange Ergebnisse (Pre-Synthesis).
-        WP-25b: Unterstützt Lazy-Compression über Experten-Profile.
-        """
+        """Führt Such-Streams aus und komprimiert überlange Ergebnisse (Pre-Synthesis)."""
         stream_keys = strategy.get("use_streams", [])
         library = self.config.get("streams_library", {})
         
@@ -130,15 +135,13 @@ class DecisionEngine:
         
         # Phase 2: Formatierung und optionale Kompression
         final_stream_tasks = []
-        
         for name, res in zip(active_streams, retrieval_results):
             if isinstance(res, Exception):
                 logger.error(f"Stream '{name}' failed during retrieval: {res}")
-                async def _err(): return "[Fehler beim Abruf dieses Wissens-Streams]"
+                async def _err(): return f"[Fehler im Wissens-Stream {name}]"
                 final_stream_tasks.append(_err())
                 continue
             
-            # Formatierung der Hits in Text
             formatted_context = self._format_stream_context(res)
             
             # WP-25a: Kompressions-Check (Inhaltsverdichtung)
@@ -160,9 +163,8 @@ class DecisionEngine:
         return dict(zip(active_streams, final_contents))
 
     async def _compress_stream_content(self, stream_name: str, content: str, query: str, profile: Optional[str]) -> str:
-        """WP-25b Module A: Inhaltsverdichtung via Lazy-Loading 'compression_template'."""
+        """WP-25b: Inhaltsverdichtung via Lazy-Loading 'compression_template'."""
         try:
-            # WP-25b: Delegation der Inhaltsverdichtung an den LLMService.
             summary = await self.llm_service.generate_raw_response(
                 prompt_key="compression_template", 
                 variables={
@@ -180,7 +182,7 @@ class DecisionEngine:
             return content
 
     async def _run_single_stream(self, name: str, cfg: Dict, query: str) -> QueryResponse:
-        """Spezialisierte Graph-Suche mit Stream-Tracing und Edge-Boosts (WP-25)."""
+        """Spezialisierte Graph-Suche mit Stream-Tracing und Edge-Boosts."""
         transformed_query = cfg.get("query_template", "{query}").format(query=query)
         
         request = QueryRequest(
@@ -188,7 +190,7 @@ class DecisionEngine:
             top_k=cfg.get("top_k", 5),
             filters={"type": cfg.get("filter_types", [])},
             expand={"depth": 1},
-            boost_edges=cfg.get("edge_boosts", {}), # WP-25a Erhalt
+            boost_edges=cfg.get("edge_boosts", {}), # Erhalt der Gewichtung
             explain=True
         )
         
@@ -200,7 +202,7 @@ class DecisionEngine:
     def _format_stream_context(self, response: QueryResponse) -> str:
         """Wandelt QueryHits in einen formatierten Kontext-String um."""
         if not response.results:
-            return "Keine spezifischen Informationen in diesem Stream gefunden."
+            return "Keine spezifischen Informationen gefunden."
         lines = []
         for i, hit in enumerate(response.results, 1):
             source = hit.source.get("path", "Unbekannt")
@@ -215,9 +217,8 @@ class DecisionEngine:
         query: str, 
         stream_results: Dict[str, str]
     ) -> str:
-        """WP-25b: Finale Synthese via Lazy-Prompt Orchestrierung."""
+        """WP-25b: Finale Synthese via Lazy-Prompt mit Robustheit aus v1.2.1."""
         profile = strategy.get("llm_profile")
-        # Nutzt den Key aus der YAML oder 'fact_synthesis_v1' als sicheren Default
         template_key = strategy.get("prompt_template", "fact_synthesis_v1")
         system_prompt = self.llm_service.get_prompt("system_prompt")
 
@@ -227,19 +228,34 @@ class DecisionEngine:
         template_vars.update(stream_results)
         template_vars["query"] = query
         
-        # WP-25a Erhalt: Optionale Prepend-Anweisung
-        template_vars["prepend_instruction"] = strategy.get("prepend_instruction", "")
-        
-        # WP-25b: Delegation der Synthese an den LLMService.
-        # Formatierung erfolgt erst nach Profil-Auflösung (Gemini vs. Llama vs. Qwen).
+        # WP-25a Erhalt: Prepend Instructions aus der strategy_config
+        prepend = strategy.get("prepend_instruction", "")
+        template_vars["prepend_instruction"] = prepend
+
         try:
-            return await self.llm_service.generate_raw_response(
+            # WP-25b: Delegation der Synthese an den LLMService
+            response = await self.llm_service.generate_raw_response(
                 prompt_key=template_key,
                 variables=template_vars,
                 system=system_prompt, 
                 profile_name=profile, 
                 priority="realtime"
             )
+            
+            # WP-25a RECOVERY: Falls dieprepend_instruction nicht im Template-Key 
+            # der prompts.yaml enthalten ist (WP-25b Lazy Loading), fügen wir sie 
+            # hier manuell an den Anfang, um die Logik aus v1.2.1 zu bewahren.
+            if prepend and prepend not in response[:len(prepend)+50]:
+                logger.info("ℹ️ Adding prepend_instruction manually (not found in response).")
+                response = f"{prepend}\n\n{response}"
+
+            return response
+
         except Exception as e:
             logger.error(f"Final Synthesis failed: {e}")
-            return "Ich konnte keine Antwort generieren."
\ No newline at end of file
+            # ROBUST FALLBACK (v1.2.1 Gate): Versuche eine minimale Antwort zu generieren
+            fallback_context = "\n\n".join([v for v in stream_results.values() if len(v) > 20])
+            return await self.llm_service.generate_raw_response(
+                prompt=f"Beantworte: {query}\n\nKontext:\n{fallback_context}",
+                system=system_prompt, priority="realtime", profile_name=profile
+            )
\ No newline at end of file