Update Decision Engine for WP-25b: Enhance intent processing with robust intent cleaning and lazy loading. Improve strategy determination by validating against known strategies and streamline response generation. Bump version to 1.3.1 to reflect these optimizations.

2026-01-02 21:35:02 +01:00 · 2026-01-02 21:35:02 +01:00 · 38fac89f73
commit 38fac89f73
parent 7026fc4fed
2 changed files with 34 additions and 18 deletions
--- a/app/core/retrieval/decision_engine.py
+++ b/app/core/retrieval/decision_engine.py
@ -3,13 +3,13 @@ FILE: app/core/retrieval/decision_engine.py
 DESCRIPTION: Der Agentic Orchestrator für MindNet (WP-25b Edition). 
             Realisiert Multi-Stream Retrieval, Intent-basiertes Routing 
             und die neue Lazy-Prompt Orchestrierung (Module A & B).
-VERSION: 1.3.0 (WP-25b: Lazy Prompt Orchestration)
+VERSION: 1.3.1 (WP-25b: Robust Intent Cleaning & Lazy Loading)
 STATUS: Active
 FIX: 
 - WP-25b: Robuste Bereinigung von Intent-Strings (Fix: CODING[/S] -> CODING).
 - WP-25b: Umstellung auf Lazy-Loading (Übergabe von prompt_key + variables).
- WP-25b: Entfernung lokaler String-Formatierung zur Ermöglichung modell-spezifischer Prompts.
+- WP-25a: Voller Erhalt der Profil-Kaskade via LLMService v3.5.5.
- WP-25a: Volle Integration der Profil-Kaskade via LLMService v3.5.5.
+- WP-25: Beibehaltung von Stream-Tracing, Edge-Boosts und Pre-Initialization.
 - WP-25: Beibehaltung von Stream-Tracing und Pre-Initialization Robustness.
 """
 import asyncio
 import logging
@ -76,14 +76,13 @@ class DecisionEngine:
        return await self._generate_final_answer(strategy_key, strategy, query, stream_results)
    async def _determine_strategy(self, query: str) -> str:
-        """WP-25b: Nutzt den LLM-Router via Lazy-Loading prompt_key."""
+        """WP-25b: Nutzt den LLM-Router via Lazy-Loading und bereinigt Modell-Artefakte."""
        settings_cfg = self.config.get("settings", {})
        prompt_key = settings_cfg.get("router_prompt_key", "intent_router_v1")
        router_profile = settings_cfg.get("router_profile")
        try:
-            # WP-25b: Keine manuelle Formatierung mehr. Wir übergeben nur Key und Variablen.
+            # WP-25b: Delegation an LLMService ohne manuelle Vor-Formatierung.
            # Der LLMService wählt den passenden Prompt für das router_profile Modell.
            response = await self.llm_service.generate_raw_response(
                prompt_key=prompt_key,
                variables={"query": query},
@ -91,7 +90,20 @@ class DecisionEngine:
                priority="realtime", 
                profile_name=router_profile
            )
-            return str(response).strip().upper()
+            
            # WP-25b FIX: Bereinigung von Stop-Markern wie [/S] oder </s>
            raw_intent = str(response).replace("[/S]", "").replace("</s>", "").strip().upper()
            # Robustheit: Nur das erste Wort nehmen, falls das Modell zu viel plaudert
            intent = raw_intent.split()[0] if raw_intent else "FACT_WHAT"
            # Validierung gegen bekannte Strategien aus der decision_engine.yaml
            known_strategies = self.config.get("strategies", {}).keys()
            if intent not in known_strategies:
                logger.warning(f"⚠️ Unmapped intent '{intent}' from router. Falling back.")
                return "FACT_WHAT"
            return intent
        except Exception as e:
            logger.error(f"Strategy Routing failed: {e}")
            return "FACT_WHAT"
@ -150,8 +162,7 @@ class DecisionEngine:
    async def _compress_stream_content(self, stream_name: str, content: str, query: str, profile: Optional[str]) -> str:
        """WP-25b Module A: Inhaltsverdichtung via Lazy-Loading 'compression_template'."""
        try:
-            # WP-25b: Wir übergeben den Auftrag an den LLMService.
+            # WP-25b: Delegation der Inhaltsverdichtung an den LLMService.
            # Das Modell-spezifische Template wird erst beim Call aufgelöst.
            summary = await self.llm_service.generate_raw_response(
                prompt_key="compression_template", 
                variables={
@ -169,7 +180,7 @@ class DecisionEngine:
            return content
    async def _run_single_stream(self, name: str, cfg: Dict, query: str) -> QueryResponse:
-        """Spezialisierte Graph-Suche mit Stream-Tracing (WP-25)."""
+        """Spezialisierte Graph-Suche mit Stream-Tracing und Edge-Boosts (WP-25)."""
        transformed_query = cfg.get("query_template", "{query}").format(query=query)
        request = QueryRequest(
@ -177,7 +188,7 @@ class DecisionEngine:
            top_k=cfg.get("top_k", 5),
            filters={"type": cfg.get("filter_types", [])},
            expand={"depth": 1},
-            boost_edges=cfg.get("edge_boosts", {}),
+            boost_edges=cfg.get("edge_boosts", {}), # WP-25a Erhalt
            explain=True
        )
@ -204,19 +215,23 @@ class DecisionEngine:
        query: str, 
        stream_results: Dict[str, str]
    ) -> str:
-        """WP-25b: Finale Synthese via Lazy-Prompt 'rag_template'."""
+        """WP-25b: Finale Synthese via Lazy-Prompt Orchestrierung."""
        profile = strategy.get("llm_profile")
-        template_key = strategy.get("prompt_template", "rag_template")
+        # Nutzt den Key aus der YAML oder 'fact_synthesis_v1' als sicheren Default
        template_key = strategy.get("prompt_template", "fact_synthesis_v1")
        system_prompt = self.llm_service.get_prompt("system_prompt")
-        # WP-25 ROBUSTNESS: Pre-Initialization
+        # WP-25 ROBUSTNESS: Pre-Initialization der Variablen
        all_possible_streams = ["values_stream", "facts_stream", "biography_stream", "risk_stream", "tech_stream"]
        template_vars = {s: "" for s in all_possible_streams}
        template_vars.update(stream_results)
        template_vars["query"] = query
-        # WP-25b: Wir reichen die Variablen direkt an den Service weiter.
+        # WP-25a Erhalt: Optionale Prepend-Anweisung
-        # Formatierung erfolgt erst nach Profil-Auflösung (Gemini vs. Llama vs. Phi3).
+        template_vars["prepend_instruction"] = strategy.get("prepend_instruction", "")
        # WP-25b: Delegation der Synthese an den LLMService.
        # Formatierung erfolgt erst nach Profil-Auflösung (Gemini vs. Llama vs. Qwen).
        try:
            return await self.llm_service.generate_raw_response(
                prompt_key=template_key,
--- a/app/services/llm_service.py
+++ b/app/services/llm_service.py
@ -108,7 +108,7 @@ class LLMService:
        if not isinstance(data, dict):
            return str(data)
-        # 1. Spezifischstes Match: Exakte Modell-ID (z.B. 'meta-llama/llama-3.3-70b-instruct:free')
+        # 1. Spezifischstes Match: Exakte Modell-ID (z.B. 'google/gemini-2.0-flash-exp:free')
        if model_id and model_id in data:
            return str(data[model_id])
@ -166,6 +166,7 @@ class LLMService:
        if prompt_key:
            template = self.get_prompt(prompt_key, model_id=target_model, provider=target_provider)
            try:
                # Formatierung mit den übergebenen Variablen
                current_prompt = template.format(**(variables or {}))
            except Exception as e:
                logger.error(f"❌ Prompt formatting failed for key '{prompt_key}': {e}")