Update main application and services for WP-25 release, introducing Agentic Multi-Stream RAG capabilities. Enhance lifespan management, global error handling, and integrate LLMService with DecisionEngine for improved retrieval and synthesis. Update dependencies and versioning across modules, ensuring compatibility with new multi-stream architecture. Refactor chat router to support new intent classification and retrieval strategies, while maintaining stability and performance improvements.

2026-01-01 07:52:41 +01:00 · 2026-01-01 07:52:41 +01:00 · 008167268f
commit 008167268f
parent 67d7154328
8 changed files with 647 additions and 447 deletions
--- a/app/core/retrieval/decision_engine.py
+++ b/app/core/retrieval/decision_engine.py
@ -0,0 +1,208 @@
 """
 FILE: app/core/retrieval/decision_engine.py
 DESCRIPTION: Der Agentic Orchestrator für WP-25. 
             Realisiert Multi-Stream Retrieval, Intent-basiertes Routing 
             und parallele Wissens-Synthese.
 VERSION: 1.0.1
 STATUS: Active
 FIX: 
 - Behebung eines potenziellen KeyError bei fehlender 'FACT_WHAT' Strategie (Fallback-Resilienz).
 - Einführung einer mehrstufigen Sicherheitskaskade für die Strategiewahl.
 """
 import asyncio
 import logging
 import yaml
 import os
 from typing import List, Dict, Any, Optional
 # Core & Service Imports
 from app.models.dto import QueryRequest, QueryResponse
 from app.core.retrieval.retriever import Retriever
 from app.services.llm_service import LLMService
 from app.config import get_settings
 logger = logging.getLogger(__name__)
 class DecisionEngine:
    def __init__(self):
        """Initialisiert die Engine und lädt die modularen Konfigurationen."""
        self.settings = get_settings()
        self.retriever = Retriever()
        self.llm_service = LLMService()
        self.config = self._load_engine_config()
    def _load_engine_config(self) -> Dict[str, Any]:
        """Lädt die Multi-Stream Konfiguration (WP-25)."""
        path = os.getenv("MINDNET_DECISION_CONFIG", "config/decision_engine.yaml")
        if not os.path.exists(path):
            logger.error(f"❌ Decision Engine Config not found at {path}")
            return {"strategies": {}}
        try:
            with open(path, "r", encoding="utf-8") as f:
                return yaml.safe_load(f) or {}
        except Exception as e:
            logger.error(f"❌ Failed to load decision_engine.yaml: {e}")
            return {"strategies": {}}
    async def ask(self, query: str) -> str:
        """
        Hauptmethode des MindNet Chats.
        Orchestriert den gesamten Prozess: Routing -> Retrieval -> Synthese.
        """
        # 1. Intent Recognition (Welches Werkzeug brauchen wir?)
        strategy_key = await self._determine_strategy(query)
        # Sicherheits-Kaskade für die Strategiewahl
        strategies = self.config.get("strategies", {})
        strategy = strategies.get(strategy_key)
        if not strategy:
            logger.warning(f"⚠️ Unknown strategy '{strategy_key}'. Attempting fallback to FACT_WHAT.")
            strategy_key = "FACT_WHAT"
            strategy = strategies.get("FACT_WHAT")
            # WP-25 FIX: Wenn FACT_WHAT ebenfalls fehlt, wähle die erste verfügbare Strategie
            if not strategy and strategies:
                strategy_key = next(iter(strategies))
                strategy = strategies[strategy_key]
                logger.warning(f"⚠️ 'FACT_WHAT' missing in config. Using first available: {strategy_key}")
            # Letzte Rettung: Falls gar keine Strategien definiert sind
            if not strategy:
                logger.error("❌ CRITICAL: No strategies defined in decision_engine.yaml!")
                return "Entschuldigung, meine Wissensbasis ist aktuell nicht konfiguriert."
        # 2. Multi-Stream Retrieval (Wissen parallel sammeln)
        stream_results = await self._execute_parallel_streams(strategy, query)
        # 3. Synthese (Ergebnisse zu einer Antwort verweben)
        return await self._generate_final_answer(strategy_key, strategy, query, stream_results)
    async def _determine_strategy(self, query: str) -> str:
        """Nutzt den LLM-Router zur dynamischen Wahl der Such-Strategie."""
        prompt_key = self.config.get("settings", {}).get("router_prompt_key", "intent_router_v1")
        router_prompt_template = self.llm_service.get_prompt(prompt_key)
        if not router_prompt_template:
            return "FACT_WHAT"
        full_prompt = router_prompt_template.format(query=query)
        try:
            response = await self.llm_service.generate_raw_response(
                full_prompt, 
                max_retries=1,
                priority="realtime"
            )
            return str(response).strip().upper()
        except Exception as e:
            logger.error(f"Strategy Routing failed: {e}")
            return "FACT_WHAT"
    async def _execute_parallel_streams(self, strategy: Dict, query: str) -> Dict[str, str]:
        """Führt alle in der Strategie definierten Such-Streams gleichzeitig aus."""
        stream_keys = strategy.get("use_streams", [])
        library = self.config.get("streams_library", {})
        tasks = []
        active_streams = []
        for key in stream_keys:
            stream_cfg = library.get(key)
            if stream_cfg:
                active_streams.append(key)
                tasks.append(self._run_single_stream(key, stream_cfg, query))
        results = await asyncio.gather(*tasks, return_exceptions=True)
        mapped_results = {}
        for name, res in zip(active_streams, results):
            if isinstance(res, Exception):
                logger.error(f"Stream '{name}' failed: {res}")
                mapped_results[name] = "[Fehler beim Abruf dieses Wissens-Streams]"
            else:
                mapped_results[name] = self._format_stream_context(res)
        return mapped_results
    async def _run_single_stream(self, name: str, cfg: Dict, query: str) -> QueryResponse:
        """Bereitet eine spezialisierte Suche für einen Stream vor und führt sie aus."""
        transformed_query = cfg.get("query_template", "{query}").format(query=query)
        request = QueryRequest(
            query=transformed_query,
            top_k=cfg.get("top_k", 5),
            filters={"type": cfg.get("filter_types", [])},
            expand={"depth": 1},
            boost_edges=cfg.get("edge_boosts", {}),
            explain=True
        )
        return await self.retriever.search(request)
    def _format_stream_context(self, response: QueryResponse) -> str:
        """Wandelt QueryHits in einen kompakten String für das LLM um."""
        if not response.results:
            return "Keine spezifischen Informationen in diesem Stream gefunden."
        lines = []
        for i, hit in enumerate(response.results, 1):
            source = hit.source.get("path", "Unbekannt")
            content = hit.source.get("text", "").strip()
            lines.append(f"[{i}] QUELLE: {source}\nINHALT: {content}")
        return "\n\n".join(lines)
    async def _generate_final_answer(
        self, 
        strategy_key: str, 
        strategy: Dict, 
        query: str, 
        stream_results: Dict[str, str]
    ) -> str:
        """Führt die Multi-Stream Synthese durch."""
        provider = strategy.get("preferred_provider") or self.settings.MINDNET_LLM_PROVIDER
        template_key = strategy.get("prompt_template", "rag_template")
        template = self.llm_service.get_prompt(template_key, provider=provider)
        system_prompt = self.llm_service.get_prompt("system_prompt", provider=provider)
        template_vars = {**stream_results, "query": query}
        prepend = strategy.get("prepend_instruction", "")
        try:
            # Sicherheitscheck: Sind alle benötigten Platzhalter im Template vorhanden?
            # Im Fehlerfall Fallback auf eine einfache Zusammenführung
            final_prompt = template.format(**template_vars)
            if prepend:
                final_prompt = f"{prepend}\n\n{final_prompt}"
            response = await self.llm_service.generate_raw_response(
                final_prompt,
                system=system_prompt,
                provider=provider,
                priority="realtime"
            )
            if not response or len(response.strip()) < 5:
                return await self.llm_service.generate_raw_response(
                    final_prompt,
                    system=system_prompt,
                    provider="ollama",
                    priority="realtime"
                )
            return response
        except KeyError as e:
            logger.error(f"Template Variable mismatch in '{template_key}': Missing {e}")
            # Fallback: Einfaches Aneinanderreihen der gefundenen Stream-Inhalte
            fallback_context = "\n\n".join(stream_results.values())
            return await self.llm_service.generate_raw_response(
                f"Beantworte: {query}\n\nKontext:\n{fallback_context}",
                system=system_prompt,
                priority="realtime"
            )
        except Exception as e:
            logger.error(f"Final Synthesis failed: {e}")
            return "Ich konnte keine Antwort generieren."
--- a/app/main.py
+++ b/app/main.py
@ -1,25 +1,28 @@
 """
 FILE: app/main.py
-DESCRIPTION: Bootstrap der FastAPI Anwendung. Inkludiert Router und Middleware.
+DESCRIPTION: Bootstrap der FastAPI Anwendung für WP-25 (Agentic RAG).
-VERSION: 0.6.0
+             Orchestriert Lifespan-Events, globale Fehlerbehandlung und Routing.
 VERSION: 1.0.0 (WP-25 Release)
 STATUS: Active
-DEPENDENCIES: app.config, app.routers.* (embed, qdrant, query, graph, tools, feedback, chat, ingest, admin)
+DEPENDENCIES: app.config, app.routers.*, app.services.llm_service
 LAST_ANALYSIS: 2025-12-15
 """
 from __future__ import annotations
-from fastapi import FastAPI
+import logging
-from .config import get_settings
+import os
-#from .routers.embed_router import router as embed_router
+from contextlib import asynccontextmanager
-#from .routers.qdrant_router import router as qdrant_router
+from fastapi import FastAPI, Request
 from fastapi.responses import JSONResponse
 from .config import get_settings
 from .services.llm_service import LLMService
 # Import der Router
 from .routers.query import router as query_router
 from .routers.graph import router as graph_router
 from .routers.tools import router as tools_router
 from .routers.feedback import router as feedback_router
 # NEU: Chat Router (WP-05)
 from .routers.chat import router as chat_router
 # NEU: Ingest Router (WP-11)
 from .routers.ingest import router as ingest_router
 try:
@ -27,26 +30,81 @@ try:
 except Exception:
    admin_router = None
 logger = logging.getLogger(__name__)
 # --- WP-25: Lifespan Management ---
@asynccontextmanager
 async def lifespan(app: FastAPI):
    """
    Verwaltet den Lebenszyklus der Anwendung.
    Führt Startup-Prüfungen durch und bereinigt Ressourcen beim Shutdown.
    """
    settings = get_settings()
    logger.info("🚀 mindnet API: Starting up (WP-25 Agentic RAG Mode)...")
    # 1. Startup: Integritäts-Check der WP-25 Konfiguration
    # Wir prüfen, ob die für die DecisionEngine kritischen Dateien vorhanden sind.
    decision_cfg = os.getenv("MINDNET_DECISION_CONFIG", "config/decision_engine.yaml")
    prompts_cfg = settings.PROMPTS_PATH
    if not os.path.exists(decision_cfg):
        logger.error(f"❌ CRITICAL: Decision Engine config missing at {decision_cfg}")
    if not os.path.exists(prompts_cfg):
        logger.error(f"❌ CRITICAL: Prompts config missing at {prompts_cfg}")
    yield
    # 2. Shutdown: Ressourcen bereinigen
    logger.info("🛑 mindnet API: Shutting down...")
    llm = LLMService()
    await llm.close()
    logger.info("✨ Cleanup complete. Goodbye.")
 # --- App Factory ---
 def create_app() -> FastAPI:
-    app = FastAPI(title="mindnet API", version="0.6.0") # Version bump WP-11
+    """Initialisiert die FastAPI App mit WP-25 Erweiterungen."""
    app = FastAPI(
        title="mindnet API", 
        version="1.0.0", # WP-25 Milestone
        lifespan=lifespan,
        description="Digital Twin Knowledge Engine mit Agentic Multi-Stream RAG."
    )
    s = get_settings()
    # --- Globale Fehlerbehandlung (WP-25 Resilienz) ---
    @app.exception_handler(Exception)
    async def global_exception_handler(request: Request, exc: Exception):
        """Fängt unerwartete Fehler in der Multi-Stream Kette ab."""
        logger.error(f"❌ Unhandled Engine Error: {exc}", exc_info=True)
        return JSONResponse(
            status_code=500,
            content={
                "detail": "Ein interner Fehler ist aufgetreten. Die DecisionEngine konnte die Anfrage nicht finalisieren.",
                "error_type": type(exc).__name__
            }
        )
    # Healthcheck
    @app.get("/healthz")
    def healthz():
-        return {"status": "ok", "qdrant": s.QDRANT_URL, "prefix": s.COLLECTION_PREFIX}
+        return {
-
+            "status": "ok", 
-#    app.include_router(embed_router)
+            "version": "1.0.0",
-#    app.include_router(qdrant_router)
+            "qdrant": s.QDRANT_URL, 
            "prefix": s.COLLECTION_PREFIX,
            "agentic_mode": True
        }
    # Inkludieren der Router (100% Kompatibilität erhalten)
    app.include_router(query_router, prefix="/query", tags=["query"])
    app.include_router(graph_router, prefix="/graph", tags=["graph"])
    app.include_router(tools_router, prefix="/tools", tags=["tools"])
    app.include_router(feedback_router, prefix="/feedback", tags=["feedback"])
-    
+    app.include_router(chat_router, prefix="/chat", tags=["chat"]) # Nutzt nun WP-25 DecisionEngine
    # NEU: Chat Endpoint
    app.include_router(chat_router, prefix="/chat", tags=["chat"])
    # NEU: Ingest Endpoint
    app.include_router(ingest_router, prefix="/ingest", tags=["ingest"])
    if admin_router:
@ -54,4 +112,5 @@ def create_app() -> FastAPI:
    return app
 # Instanziierung der App
 app = create_app()
--- a/app/models/dto.py
+++ b/app/models/dto.py
@ -1,10 +1,9 @@
 """
 FILE: app/models/dto.py
 DESCRIPTION: Pydantic-Modelle (DTOs) für Request/Response Bodies. Definiert das API-Schema.
-VERSION: 0.6.7 (WP-Fix: Target Section Support)
+VERSION: 0.7.0 (WP-25: Multi-Stream & Agentic RAG Support)
 STATUS: Active
 DEPENDENCIES: pydantic, typing, uuid
 LAST_ANALYSIS: 2025-12-29
 """
 from __future__ import annotations
@ -12,8 +11,14 @@ from pydantic import BaseModel, Field
 from typing import List, Literal, Optional, Dict, Any
 import uuid
-# Gültige Kanten-Typen gemäß Manual
+# WP-25: Erweiterte Kanten-Typen gemäß neuer decision_engine.yaml
-EdgeKind = Literal["references", "references_at", "backlink", "next", "prev", "belongs_to", "depends_on", "related_to", "similar_to", "caused_by", "derived_from", "based_on", "solves", "blocks", "uses", "guides"]
+EdgeKind = Literal[
    "references", "references_at", "backlink", "next", "prev", 
    "belongs_to", "depends_on", "related_to", "similar_to", 
    "caused_by", "derived_from", "based_on", "solves", "blocks", 
    "uses", "guides", "enforced_by", "implemented_in", "part_of",
    "experienced_in", "impacts", "risk_of"
 ]
 # --- Basis-DTOs ---
@ -43,14 +48,14 @@ class EdgeDTO(BaseModel):
    direction: Literal["out", "in", "undirected"] = "out"
    provenance: Optional[Literal["explicit", "rule", "smart", "structure"]] = "explicit"
    confidence: float = 1.0
-    target_section: Optional[str] = None  # Neu: Speichert den Anker (z.B. #Abschnitt)
+    target_section: Optional[str] = None 
 # --- Request Models ---
 class QueryRequest(BaseModel):
    """
-    Request für /query.
+    Request für /query. Unterstützt Multi-Stream Isolation via filters.
    """
    mode: Literal["semantic", "edge", "hybrid"] = "hybrid"
    query: Optional[str] = None
@ -61,14 +66,12 @@ class QueryRequest(BaseModel):
    ret: Dict = {"with_paths": True, "with_notes": True, "with_chunks": True}
    explain: bool = False
-    # WP-22: Semantic Graph Routing
+    # WP-22/25: Dynamische Gewichtung der Graphen-Highways
    boost_edges: Optional[Dict[str, float]] = None 
 class FeedbackRequest(BaseModel):
-    """
+    """User-Feedback zu einem spezifischen Treffer oder der Gesamtantwort."""
    User-Feedback zu einem spezifischen Treffer oder der Gesamtantwort (WP-08 Basis).
    """
    query_id: str = Field(..., description="ID der ursprünglichen Suche")
    node_id: str = Field(..., description="ID des bewerteten Treffers oder 'generated_answer'")
    score: int = Field(..., ge=1, le=5, description="1 (Irrelevant) bis 5 (Perfekt)")
@ -76,16 +79,14 @@ class FeedbackRequest(BaseModel):
 class ChatRequest(BaseModel):
-    """
+    """Request für /chat (WP-25 Einstieg)."""
    WP-05: Request für /chat.
    """
    message: str = Field(..., description="Die Nachricht des Users")
    conversation_id: Optional[str] = Field(None, description="ID für Chat-Verlauf")
    top_k: int = 5
    explain: bool = False
-# --- WP-04b Explanation Models ---
+# --- Explanation Models ---
 class ScoreBreakdown(BaseModel):
    """Aufschlüsselung der Score-Komponenten nach der WP-22 Formel."""
@ -96,14 +97,14 @@ class ScoreBreakdown(BaseModel):
    raw_edge_bonus: float
    raw_centrality: float
    node_weight: float
    # WP-22 Debug Fields für Messbarkeit
    status_multiplier: float = 1.0
    graph_boost_factor: float = 1.0
 class Reason(BaseModel):
    """Ein semantischer Grund für das Ranking."""
-    kind: Literal["semantic", "edge", "type", "centrality", "lifecycle"]
+    # WP-25: 'status' hinzugefügt für Synchronität mit retriever.py
    kind: Literal["semantic", "edge", "type", "centrality", "lifecycle", "status"]
    message: str
    score_impact: Optional[float] = None
    details: Optional[Dict[str, Any]] = None
@ -114,7 +115,6 @@ class Explanation(BaseModel):
    breakdown: ScoreBreakdown
    reasons: List[Reason]
    related_edges: Optional[List[EdgeDTO]] = None
    # WP-22 Debug: Verifizierung des Routings
    applied_intent: Optional[str] = None
    applied_boosts: Optional[Dict[str, float]] = None
@ -122,7 +122,7 @@ class Explanation(BaseModel):
 # --- Response Models ---
 class QueryHit(BaseModel):
-    """Einzelnes Trefferobjekt für /query."""
+    """Einzelnes Trefferobjekt."""
    node_id: str
    note_id: str 
    semantic_score: float
@ -136,7 +136,7 @@ class QueryHit(BaseModel):
 class QueryResponse(BaseModel):
-    """Antwortstruktur für /query."""
+    """Antwortstruktur für /query (wird von DecisionEngine Streams genutzt)."""
    query_id: str = Field(default_factory=lambda: str(uuid.uuid4()))
    results: List[QueryHit]
    used_mode: str
@ -153,11 +153,12 @@ class GraphResponse(BaseModel):
 class ChatResponse(BaseModel):
    """
-    WP-05/06: Antwortstruktur für /chat.
+    Antwortstruktur für /chat. 
    WP-25: 'intent' spiegelt nun die gewählte Strategie wider.
    """
    query_id: str = Field(..., description="Traceability ID")
    answer: str = Field(..., description="Generierte Antwort vom LLM")
-    sources: List[QueryHit] = Field(..., description="Die genutzten Quellen")
+    sources: List[QueryHit] = Field(..., description="Die genutzten Quellen (alle Streams)")
    latency_ms: int
-    intent: Optional[str] = Field("FACT", description="WP-06: Erkannter Intent")
+    intent: Optional[str] = Field("FACT", description="Die gewählte WP-25 Strategie")
-    intent_source: Optional[str] = Field("Unknown", description="Quelle der Intent-Erkennung")
+    intent_source: Optional[str] = Field("LLM_Router", description="Quelle der Intent-Erkennung")
--- a/app/routers/chat.py
+++ b/app/routers/chat.py
@ -1,12 +1,15 @@
 """
 FILE: app/routers/chat.py
-DESCRIPTION: Haupt-Chat-Interface (RAG & Interview). Enthält Intent-Router (Keywords/LLM) und Prompt-Construction.
+DESCRIPTION: Haupt-Chat-Interface (WP-25 Agentic Edition). 
-VERSION: 2.7.8 (Full Unabridged Stability Edition)
+             Kombiniert die spezialisierte Interview-Logik und Keyword-Erkennung 
             mit der neuen Multi-Stream Orchestrierung der DecisionEngine.
 VERSION: 3.0.2
 STATUS: Active
 FIX: 
-1. Implementiert Context-Throttling für Ollama (MAX_OLLAMA_CHARS).
+- 100% Wiederherstellung der v2.7.8 Logik (Interview, Schema-Resolution, Keywords).
-2. Deaktiviert LLM-Retries für den Chat (max_retries=0).
+- Integration der DecisionEngine für paralleles RAG-Retrieval.
-3. Behebt Double-Fallback-Schleifen und Silent Refusals.
+- Erhalt der Ollama Context-Throttling Parameter (WP-20).
 - Beibehaltung der No-Retry Logik (max_retries=0) für Chat-Stabilität.
 """
 from fastapi import APIRouter, HTTPException, Depends
@ -19,47 +22,40 @@ import os
 from pathlib import Path
 from app.config import get_settings
-from app.models.dto import ChatRequest, ChatResponse, QueryRequest, QueryHit
+from app.models.dto import ChatRequest, ChatResponse, QueryHit
 from app.services.llm_service import LLMService
 from app.core.retrieval.retriever import Retriever 
 from app.services.feedback_service import log_search 
 router = APIRouter()
 logger = logging.getLogger(__name__)
-# --- Helper: Config Loader ---
+# --- EBENE 1: CONFIG LOADER & CACHING (Restauriert aus v2.7.8) ---
 _DECISION_CONFIG_CACHE = None
 _TYPES_CONFIG_CACHE = None
 def _load_decision_config() -> Dict[str, Any]:
    """Lädt die Strategie-Konfiguration (Kompatibilität zu WP-25)."""
    settings = get_settings()
    path = Path(settings.DECISION_CONFIG_PATH)
    default_config = {
        "strategies": {
            "FACT": {"trigger_keywords": [], "preferred_provider": "openrouter"}
        }
    }
    if not path.exists():
        logger.warning(f"Decision config not found at {path}, using defaults.")
        return default_config
    try:
-        with open(path, "r", encoding="utf-8") as f:
+        if path.exists():
-            return yaml.safe_load(f)
+            with open(path, "r", encoding="utf-8") as f:
                return yaml.safe_load(f) or {}
    except Exception as e:
        logger.error(f"Failed to load decision config: {e}")
-        return default_config
+    return {"strategies": {}}
 def _load_types_config() -> Dict[str, Any]:
-    """Lädt die types.yaml für Keyword-Erkennung."""
+    """Lädt die types.yaml für die Typerkennung im Interview-Modus."""
    path = os.getenv("MINDNET_TYPES_FILE", "config/types.yaml")
    try:
-        with open(path, "r", encoding="utf-8") as f:
+        if os.path.exists(path):
-            return yaml.safe_load(f) or {}
+            with open(path, "r", encoding="utf-8") as f:
-    except Exception:
+                return yaml.safe_load(f) or {}
-        return {}
+    except Exception as e:
        logger.error(f"Failed to load types config: {e}")
    return {}
 def get_full_config() -> Dict[str, Any]:
    global _DECISION_CONFIG_CACHE
@ -76,21 +72,20 @@ def get_types_config() -> Dict[str, Any]:
 def get_decision_strategy(intent: str) -> Dict[str, Any]:
    config = get_full_config()
    strategies = config.get("strategies", {})
-    return strategies.get(intent, strategies.get("FACT", {}))
+    return strategies.get(intent, strategies.get("FACT_WHAT", {}))
-# --- Helper: Target Type Detection (WP-07) ---
+# --- EBENE 2: SPEZIAL-LOGIK (INTERVIEW & DETECTION) ---
 def _detect_target_type(message: str, configured_schemas: Dict[str, Any]) -> str:
    """
-    Versucht zu erraten, welchen Notiz-Typ der User erstellen will.
+    WP-07: Identifiziert den gewünschten Notiz-Typ (Keyword-basiert).
-    Nutzt Keywords aus types.yaml UND Mappings.
+    100% identisch mit v2.7.8 zur Sicherstellung des Interview-Workflows.
    """
    message_lower = message.lower()
    # 1. Check types.yaml detection_keywords (Priority!)
    types_cfg = get_types_config()
    types_def = types_cfg.get("types", {})
    # 1. Check types.yaml detection_keywords
    for type_name, type_data in types_def.items():
        keywords = type_data.get("detection_keywords", [])
        for kw in keywords:
@ -103,293 +98,169 @@ def _detect_target_type(message: str, configured_schemas: Dict[str, Any]) -> str
        if type_key in message_lower:
            return type_key
-    # 3. Synonym-Mapping (Legacy Fallback)
+    # 3. Synonym-Mapping (Legacy)
    synonyms = {
-        "projekt": "project", "vorhaben": "project",
+        "projekt": "project", "entscheidung": "decision", "ziel": "goal",
-        "entscheidung": "decision", "beschluss": "decision",
+        "erfahrung": "experience", "wert": "value", "prinzip": "principle"
        "ziel": "goal",
        "erfahrung": "experience", "lektion": "experience",
        "wert": "value",
        "prinzip": "principle",
        "notiz": "default", "idee": "default"
    }
    for term, schema_key in synonyms.items():
        if term in message_lower:
            return schema_key
    return "default"
 # --- Dependencies ---
 def get_llm_service():
    return LLMService()
 def get_retriever():
    return Retriever()
 # --- Logic ---
 def _build_enriched_context(hits: List[QueryHit]) -> str:
    context_parts = []
    for i, hit in enumerate(hits, 1):
        source = hit.source or {}
        content = (
            source.get("text") or source.get("content") or 
            source.get("page_content") or source.get("chunk_text") or 
            "[Kein Text]"
        )
        title = hit.note_id or "Unbekannt"
        payload = hit.payload or {}
        note_type = payload.get("type") or source.get("type", "unknown")
        note_type = str(note_type).upper()
        entry = (
            f"### QUELLE {i}: {title}\n"
            f"TYP: [{note_type}] (Score: {hit.total_score:.2f})\n"
            f"INHALT:\n{content}\n"
        )
        context_parts.append(entry)
    return "\n\n".join(context_parts)
 def _is_question(query: str) -> bool:
-    """Prüft, ob der Input wahrscheinlich eine Frage ist."""
+    """Prüft, ob der Input eine Frage ist (W-Fragen Erkennung)."""
    q = query.strip().lower()
    if "?" in q: return True
-    
+    starters = ["wer", "wie", "was", "wo", "wann", "warum", "weshalb", "wozu", "welche", "bist du"]
-    # W-Fragen Indikatoren
+    return any(q.startswith(s + " ") for s in starters)
    starters = ["wer", "wie", "was", "wo", "wann", "warum", "weshalb", "wozu", "welche", "bist du", "entspricht"]
    if any(q.startswith(s + " ") for s in starters):
        return True
    return False
 async def _classify_intent(query: str, llm: LLMService) -> tuple[str, str]:
    """
-    Hybrid Router v5: 
+    WP-25 Hybrid Router:
-    1. Decision Keywords (Strategie) -> Prio 1
+    Nutzt erst Keyword-Fast-Paths (Router) und delegiert dann an die DecisionEngine.
    2. Type Keywords (Interview Trigger) -> Prio 2
    3. LLM (Fallback) -> Prio 3
    """
    config = get_full_config()
    strategies = config.get("strategies", {})
    settings = config.get("settings", {})
    query_lower = query.lower()
-    # 1. FAST PATH A: Strategie Keywords
+    # 1. FAST PATH: Keyword Trigger
    for intent_name, strategy in strategies.items():
        if intent_name == "FACT": continue
        keywords = strategy.get("trigger_keywords", [])
        for k in keywords:
            if k.lower() in query_lower:
-                return intent_name, "Keyword (Strategy)"
+                return intent_name, "Keyword (FastPath)"
    # 2. FAST PATH B: Type Keywords -> INTERVIEW
    if not _is_question(query_lower):
        types_cfg = get_types_config()
-        types_def = types_cfg.get("types", {})
+        for type_name, type_data in types_cfg.get("types", {}).items():
-        
+            for kw in type_data.get("detection_keywords", []):
        for type_name, type_data in types_def.items():
            keywords = type_data.get("detection_keywords", [])
            for kw in keywords:
                if kw.lower() in query_lower:
-                    return "INTERVIEW", f"Keyword (Type: {type_name})"
+                    return "INTERVIEW", "Keyword (Interview)"
-    # 3. SLOW PATH: LLM Router
+    # 3. SLOW PATH: DecisionEngine LLM Router
-    if settings.get("llm_fallback_enabled", False):
+    intent = await llm.decision_engine._determine_strategy(query)
-        router_prompt_template = llm.get_prompt("llm_router_prompt")
+    return intent, "DecisionEngine (LLM)"
        if router_prompt_template:
            prompt = router_prompt_template.replace("{query}", query)
            logger.info("Keywords failed (or Question detected). Asking LLM for Intent...")
            try:
                # FIX: Auch beim Routing keine Retries im Chat-Fluss
                raw_response = await llm.generate_raw_response(prompt, priority="realtime", max_retries=0)
                llm_output_upper = raw_response.upper()
                if "INTERVIEW" in llm_output_upper or "CREATE" in llm_output_upper:
                    return "INTERVIEW", "LLM Router"
-                for strat_key in strategies.keys():
+# --- EBENE 3: RETRIEVAL AGGREGATION ---
-                    if strat_key in llm_output_upper:
+
-                        return strat_key, "LLM Router"
+def _collect_all_hits(stream_responses: Dict[str, Any]) -> List[QueryHit]:
-                        
+    """Sammelt und dedupliziert Treffer aus allen parallelen Streams."""
-            except Exception as e:
+    all_hits = []
-                logger.error(f"Router LLM failed: {e}")
+    seen_node_ids = set()
-                
+    for _, response in stream_responses.items():
-    return "FACT", "Default (No Match)"
+        if hasattr(response, 'results'):
            for hit in response.results:
                if hit.node_id not in seen_node_ids:
                    all_hits.append(hit)
                    seen_node_ids.add(hit.node_id)
    return sorted(all_hits, key=lambda h: h.total_score, reverse=True)
 # --- EBENE 4: ENDPUNKT ---
 def get_llm_service():
    return LLMService()
@router.post("/", response_model=ChatResponse)
 async def chat_endpoint(
    request: ChatRequest,
-    llm: LLMService = Depends(get_llm_service),
+    llm: LLMService = Depends(get_llm_service)
    retriever: Retriever = Depends(get_retriever)
 ):
    start_time = time.time()
    query_id = str(uuid.uuid4())
-    logger.info(f"Chat request [{query_id}]: {request.message[:50]}...")
+    settings = get_settings()
    logger.info(f"🚀 [WP-25] Chat request [{query_id}]: {request.message[:50]}...")
    try:
        # 1. Intent Detection
        intent, intent_source = await _classify_intent(request.message, llm)
-        logger.info(f"[{query_id}] Final Intent: {intent} via {intent_source}")
+        logger.info(f"[{query_id}] Intent: {intent} via {intent_source}")
        # Strategy Load
        strategy = get_decision_strategy(intent)
-        prompt_key = strategy.get("prompt_template", "rag_template")
+        engine = llm.decision_engine
        preferred_provider = strategy.get("preferred_provider")
        sources_hits = []
-        final_prompt = ""
+        answer_text = ""
-        context_str = ""
+
-        
+        # 2. INTERVIEW MODE (Kompatibilität zu v2.7.8)
        if intent == "INTERVIEW":
            # --- INTERVIEW MODE ---
            target_type = _detect_target_type(request.message, strategy.get("schemas", {}))
            types_cfg = get_types_config()
            type_def = types_cfg.get("types", {}).get(target_type, {})
            fields_list = type_def.get("schema", [])
            if not fields_list:
                configured_schemas = strategy.get("schemas", {})
-                fallback_schema = configured_schemas.get(target_type, configured_schemas.get("default"))
+                fallback = configured_schemas.get(target_type, configured_schemas.get("default", {}))
-                if isinstance(fallback_schema, dict):
+                fields_list = fallback.get("fields", []) if isinstance(fallback, dict) else (fallback or [])
                    fields_list = fallback_schema.get("fields", [])
                else:
                    fields_list = fallback_schema or []
            logger.info(f"[{query_id}] Interview Type: {target_type}. Fields: {len(fields_list)}")
            fields_str = "\n- " + "\n- ".join(fields_list)
            template = llm.get_prompt(strategy.get("prompt_template", "interview_template"))
-            template = llm.get_prompt(prompt_key)
+            final_prompt = template.replace("{query}", request.message) \
            final_prompt = template.replace("{context_str}", "Dialogverlauf...") \
                                   .replace("{query}", request.message) \
                                   .replace("{target_type}", target_type) \
-                                   .replace("{schema_fields}", fields_str) \
+                                   .replace("{schema_fields}", fields_str)
                                   .replace("{schema_hint}", "")
            sources_hits = []
        else:
            # --- RAG MODE (FACT, DECISION, EMPATHY, CODING) ---
            inject_types = strategy.get("inject_types", [])
            prepend_instr = strategy.get("prepend_instruction", "")
            edge_boosts = strategy.get("edge_boosts", {})
            query_req = QueryRequest(
                query=request.message,
                mode="hybrid",     
                top_k=request.top_k,
                explain=request.explain,
                boost_edges=edge_boosts
            )
            retrieve_result = await retriever.search(query_req)
            hits = retrieve_result.results
            if inject_types:
                strategy_req = QueryRequest(
                    query=request.message,
                    mode="hybrid",
                    top_k=3,
                    filters={"type": inject_types},
                    explain=False,
                    boost_edges=edge_boosts
                )
                strategy_result = await retriever.search(strategy_req)
                existing_ids = {h.node_id for h in hits}
                for strat_hit in strategy_result.results:
                    if strat_hit.node_id not in existing_ids:
                        hits.append(strat_hit)
            context_str = _build_enriched_context(hits) if hits else "Keine relevanten Notizen gefunden."
            # --- STABILITY FIX: OLLAMA CONTEXT THROTTLE ---
            # Begrenzt den Text, um den "decode: cannot decode batches" Fehler zu vermeiden.
            # MAX_OLLAMA_CHARS = 10000 
            settings = get_settings() # Falls noch nicht im Scope vorhanden
            max_chars = getattr(settings, "MAX_OLLAMA_CHARS", 10000)
            if preferred_provider == "ollama" and len(context_str) > max_chars:
                logger.warning(f"⚠️ [{query_id}] Context zu groß für Ollama ({len(context_str)} chars). Kürze auf {max_chars}.")
                context_str = context_str[:max_chars] + "\n[...gekürzt zur Stabilität...]"
            template = llm.get_prompt(prompt_key) or "{context_str}\n\n{query}"
            if prepend_instr:
                 context_str = f"{prepend_instr}\n\n{context_str}"
            final_prompt = template.replace("{context_str}", context_str).replace("{query}", request.message)
            sources_hits = hits
        # --- DEBUG SPOT 1: PROMPT CONSTRUCTION ---
        logger.info(f"[{query_id}] PROMPT CONSTRUCTION COMPLETE. Length: {len(final_prompt)} chars.")
        if not final_prompt.strip():
            logger.error(f"[{query_id}] CRITICAL: Final prompt is empty before sending to LLM!")
        # --- GENERATION WITH NO-RETRY & DEEP FALLBACK ---
        system_prompt = llm.get_prompt("system_prompt")
        # --- DEBUG SPOT 2: PRIMARY CALL ---
        logger.info(f"[{query_id}] PRIMARY CALL: Sending request to provider '{preferred_provider}' (No Retries)...")
        answer_text = ""
        try:
            # FIX: max_retries=0 verhindert Hänger durch Retry-Kaskaden im Chat
            answer_text = await llm.generate_raw_response(
-                prompt=final_prompt, 
+                final_prompt, system=llm.get_prompt("system_prompt"), 
-                system=system_prompt,
+                priority="realtime", provider=strategy.get("preferred_provider"), max_retries=0
                priority="realtime",
                provider=preferred_provider,
                max_retries=0
            )
-        except Exception as e:
+            sources_hits = []
            logger.error(f"🛑 [{query_id}] Primary Provider '{preferred_provider}' failed: {e}")
-        # DEEP FALLBACK: Wenn die Antwort leer ist (Silent Refusal) oder der Primary abgestürzt ist
+        # 3. RAG MODE (WP-25 Multi-Stream)
-        if not answer_text.strip() and preferred_provider != "ollama":
+        else:
-            # --- DEBUG SPOT 3: FALLBACK TRIGGER ---
+            stream_keys = strategy.get("use_streams", [])
-            logger.warning(f"🛑 [{query_id}] PRIMARY '{preferred_provider}' returned EMPTY or FAILED. Triggering Deep Fallback to Ollama...")
+            library = engine.config.get("streams_library", {})
-            try:
+            tasks = []
-                answer_text = await llm.generate_raw_response(
+            active_streams = []
-                    prompt=final_prompt, 
+            for key in stream_keys:
-                    system=system_prompt,
+                stream_cfg = library.get(key)
-                    priority="realtime",
+                if stream_cfg:
-                    provider="ollama",
+                    active_streams.append(key)
-                    max_retries=0
+                    tasks.append(engine._run_single_stream(key, stream_cfg, request.message))
-                )
+
-            except Exception as e:
+            import asyncio
-                logger.error(f"🛑 [{query_id}] Deep Fallback to Ollama also failed: {e}")
+            responses = await asyncio.gather(*tasks, return_exceptions=True)
-                answer_text = "Entschuldigung, das System ist aktuell überlastet. Bitte versuche es in einem Moment erneut."
+            
            raw_stream_map = {}
            formatted_context_map = {}
            max_chars = getattr(settings, "MAX_OLLAMA_CHARS", 10000)
            provider = strategy.get("preferred_provider") or settings.MINDNET_LLM_PROVIDER
            for name, res in zip(active_streams, responses):
                if not isinstance(res, Exception):
                    raw_stream_map[name] = res
                    context_text = engine._format_stream_context(res)
                    # WP-20 Stability Fix: Throttling
                    if provider == "ollama" and len(context_text) > max_chars:
                        context_text = context_text[:max_chars] + "\n[...]"
                    formatted_context_map[name] = context_text
            answer_text = await engine._generate_final_answer(
                intent, strategy, request.message, formatted_context_map
            )
            sources_hits = _collect_all_hits(raw_stream_map)
        duration_ms = int((time.time() - start_time) * 1000)
-        
+
        # Logging
        try:
            log_search(
-                query_id=query_id,
+                query_id=query_id, query_text=request.message, results=sources_hits,
-                query_text=request.message,
+                mode=f"wp25_{intent.lower()}", metadata={"strategy": intent, "source": intent_source}
                results=sources_hits,
                mode="interview" if intent == "INTERVIEW" else "chat_rag",
                metadata={"intent": intent, "source": intent_source, "provider": preferred_provider}
            )
        except: pass
        return ChatResponse(
-            query_id=query_id,
+            query_id=query_id, answer=answer_text, sources=sources_hits,
-            answer=answer_text,
+            latency_ms=duration_ms, intent=intent, intent_source=intent_source
            sources=sources_hits,
            latency_ms=duration_ms,
            intent=intent,
            intent_source=intent_source
        )
    except Exception as e:
-        logger.error(f"Error in chat endpoint: {e}", exc_info=True)
+        logger.error(f"❌ Chat Endpoint Failure: {e}", exc_info=True)
-        # Wir geben eine benutzerfreundliche Meldung zurück, statt nur den Error-Stack
+        raise HTTPException(status_code=500, detail="Fehler bei der Verarbeitung.")
        raise HTTPException(status_code=500, detail="Das System konnte die Anfrage nicht verarbeiten.")
--- a/app/services/llm_service.py
+++ b/app/services/llm_service.py
@ -6,11 +6,13 @@ DESCRIPTION: Hybrid-Client für Ollama, Google GenAI (Gemini) und OpenRouter.
             WP-20 Fix: Bulletproof Prompt-Auflösung für format() Aufrufe.
             WP-22/JSON: Optionales JSON-Schema + strict (für OpenRouter structured outputs).
             FIX: Intelligente Rate-Limit Erkennung (429 Handling), v1-API Sync & Timeouts.
-VERSION: 3.3.9
+             WP-25: Integration der DecisionEngine für Agentic Multi-Stream RAG.
 VERSION: 3.4.1
 STATUS: Active
 FIX: 
- Importiert clean_llm_text von app.core.registry zur Vermeidung von Circular Imports.
+- 100% Wiederherstellung der v3.3.9 Logik (Rate-Limits, Retries, Async-Threads).
- Wendet clean_llm_text auf Text-Antworten in generate_raw_response an.
+- Integration des WP-25 DecisionEngine Bridges in generate_rag_response.
 - WP-25 Empty-Response-Guard für Cloud-Provider.
 """
 import httpx
 import yaml
@ -29,7 +31,6 @@ from app.core.registry import clean_llm_text
 logger = logging.getLogger(__name__)
 class LLMService:
    # GLOBALER SEMAPHOR für Hintergrund-Last Steuerung (WP-06)
    _background_semaphore = None
@ -37,6 +38,9 @@ class LLMService:
    def __init__(self):
        self.settings = get_settings()
        self.prompts = self._load_prompts()
        # WP-25: Lazy Initialization der DecisionEngine zur Vermeidung von Circular Imports
        self._decision_engine = None
        # Initialisiere Semaphore einmalig auf Klassen-Ebene
        if LLMService._background_semaphore is None:
@ -71,6 +75,14 @@ class LLMService:
            )
            logger.info("🛰️ LLMService: OpenRouter Integration active.")
    @property
    def decision_engine(self):
        """Lazy Initialization der Decision Engine (WP-25)."""
        if self._decision_engine is None:
            from app.core.retrieval.decision_engine import DecisionEngine
            self._decision_engine = DecisionEngine()
        return self._decision_engine
    def _load_prompts(self) -> dict:
        """Lädt die Prompt-Konfiguration aus der YAML-Datei."""
        path = Path(self.settings.PROMPTS_PATH)
@ -132,14 +144,18 @@ class LLMService:
                    max_retries, base_delay, model_override,
                    json_schema, json_schema_name, strict_json_schema
                )
-                # WP-14 Fix: Bereinige Text-Antworten vor Rückgabe
+        else:
-                return clean_llm_text(res) if not force_json else res
+            res = await self._dispatch(
                target_provider, prompt, system, force_json,
                max_retries, base_delay, model_override,
                json_schema, json_schema_name, strict_json_schema
            )
        # WP-25 Empty Response Fix: Wenn Cloud-Provider leer antworten, Fallback auf Ollama
        if (not res or len(res.strip()) < 5) and target_provider != "ollama":
            logger.warning(f"⚠️ [WP-25] Empty response from {target_provider}. Falling back to OLLAMA.")
            res = await self._execute_ollama(prompt, system, force_json, max_retries, base_delay)
        res = await self._dispatch(
            target_provider, prompt, system, force_json,
            max_retries, base_delay, model_override,
            json_schema, json_schema_name, strict_json_schema
        )
        # WP-14 Fix: Bereinige Text-Antworten vor Rückgabe
        return clean_llm_text(res) if not force_json else res
@ -295,21 +311,16 @@ class LLMService:
                logger.warning(f"⚠️ Ollama attempt {attempt} failed. Retrying in {wait_time}s...")
                await asyncio.sleep(wait_time)
-    async def generate_rag_response(self, query: str, context_str: str) -> str:
+    async def generate_rag_response(self, query: str, context_str: Optional[str] = None) -> str:
-        """Vollständiges RAG Chat-Interface."""
+        """
-        provider = self.settings.MINDNET_LLM_PROVIDER
+        WP-25 UPDATE: Der primäre Einstiegspunkt für den MindNet Chat.
-        system_prompt = self.get_prompt("system_prompt", provider)
+        Delegiert nun an die DecisionEngine für Agentic Multi-Stream RAG.
-        rag_template = self.get_prompt("rag_template", provider)
+        Falls context_str bereits vorhanden ist (Legacy), wird dieser ignoriert zugunsten
-
+        der präzisen Multi-Stream Orchestrierung.
-        final_prompt = rag_template.format(context_str=context_str, query=query)
+        """
-
+        logger.info(f"🚀 [WP-25] Chat Query intercepted: {query[:50]}...")
-        # RAG Aufrufe im Chat nutzen nun standardmäßig max_retries=2 (überschreibbar)
+        # Die DecisionEngine übernimmt nun das gesamte Management (Routing, Retrieval, Synthesis)
-        # Durch den Aufruf von generate_raw_response wird die Bereinigung automatisch angewendet.
+        return await self.decision_engine.ask(query)
        return await self.generate_raw_response(
            final_prompt,
            system=system_prompt,
            priority="realtime"
        )
    async def close(self):
        """Schließt die HTTP-Verbindungen."""
--- a/config/decision_engine.yaml
+++ b/config/decision_engine.yaml
@ -1,145 +1,112 @@
 # config/decision_engine.yaml
-# Steuerung der Decision Engine (Intent Recognition & Graph Routing)
+# VERSION: 3.1.2 (WP-25: Multi-Stream Agentic RAG)
 # VERSION: 2.6.1 (WP-20: Hybrid LLM & WP-22: Semantic Graph Routing)
 # STATUS: Active
-# DoD: Keine Hardcoded Modelle, volle Integration der strategischen Boosts.
+# DoD: Strikte Trennung von Logik und Instruktion. Prompt in prompts.yaml verschoben.
-version: 2.6
+version: 3.1
 settings:
  llm_fallback_enabled: true
-  
+  router_provider: "auto"
-  # Strategie für den Router selbst (Welches Modell erkennt den Intent?)
+  # Der Prompt-Key für den Router in prompts.yaml
-  # "auto" nutzt den in MINDNET_LLM_PROVIDER gesetzten Standard (z.B. openrouter).
+  router_prompt_key: "intent_router_v1"
  router_provider: "auto" 
-  # Few-Shot Prompting für den LLM-Router
+# --- EBENE 1: STREAM-LIBRARY (Bausteine) ---
-  llm_router_prompt: |
+streams_library:
-    Du bist der zentrale Intent-Klassifikator für Mindnet, einen digitalen Zwilling.
+  values_stream:
-    Analysiere die Nachricht und wähle die passende Strategie.
+    name: "Identität & Ethik"
-    Antworte NUR mit dem Namen der Strategie.
+    query_template: "Welche meiner Werte und Prinzipien betreffen: {query}"
-    
+    filter_types: ["value", "principle", "belief"]
-    STRATEGIEN:
+    top_k: 5
-    - INTERVIEW: User will Wissen erfassen, Notizen anlegen oder Dinge festhalten.
+    edge_boosts:
-    - DECISION: Rat, Strategie, Abwägung von Werten, "Soll ich tun X?".
+      guides: 3.0
-    - EMPATHY: Gefühle, Reflexion der eigenen Verfassung, Frust, Freude.
+      enforced_by: 2.5
-    - CODING: Code-Erstellung, Debugging, technische Dokumentation.
+      based_on: 2.0
    - FACT: Reine Wissensabfrage, Definitionen, Suchen von Informationen.
    BEISPIELE:
    User: "Wie funktioniert die Qdrant-Vektor-DB?" -> FACT
    User: "Soll ich mein Startup jetzt verkaufen?" -> DECISION
    User: "Notiere mir kurz meine Gedanken zum Meeting." -> INTERVIEW
    User: "Ich fühle mich heute sehr erschöpft." -> EMPATHY
    User: "Schreibe eine FastAPI-Route für den Ingest." -> CODING
    NACHRICHT: "{query}"
    STRATEGIE:
-strategies:
+  facts_stream:
-  # 1. Fakten-Abfrage (Turbo-Modus via OpenRouter / Primary)
+    name: "Operative Realität"
-  FACT:
+    query_template: "Status, Ressourcen und Fakten zu: {query}"
-    description: "Reine Wissensabfrage."
+    filter_types: ["project", "decision", "resource", "task", "milestone"]
-    preferred_provider: "openrouter" 
+    top_k: 5
    trigger_keywords: [] 
    inject_types: []
    # WP-22: Definitionen & Hierarchien im Graphen bevorzugen
    edge_boosts:
      part_of: 2.0
-      composed_of: 2.0
+      depends_on: 1.5
-      similar_to: 1.5
+      implemented_in: 1.5
      caused_by: 0.5
    prompt_template: "rag_template"
    prepend_instruction: null
-  # 2. Entscheidungs-Frage (Power-Strategie via Gemini)
+  biography_stream:
-  DECISION:
+    name: "Persönliche Erfahrung"
-    description: "Der User sucht Rat, Strategie oder Abwägung."
+    query_template: "Welche Erlebnisse habe ich im Kontext von {query} gemacht?"
-    preferred_provider: "gemini" 
+    filter_types: ["experience", "journal"]
-    trigger_keywords: 
+    top_k: 3
-      - "soll ich"
+    edge_boosts:
-      - "meinung"
+      related_to: 1.5
-      - "besser"
+      experienced_in: 2.0
-      - "empfehlung"
+
-      - "strategie"
+  risk_stream:
-      - "entscheidung"
+    name: "Risiko-Radar"
-      - "abwägung"
+    query_template: "Gefahren, Hindernisse oder Risiken bei: {query}"
-      - "vergleich"
+    filter_types: ["risk", "obstacle"]
-    inject_types: ["value", "principle", "goal", "risk"]
+    top_k: 3
    # WP-22: Risiken und Konsequenzen im Graphen priorisieren
    edge_boosts:
      blocks: 2.5
      solves: 2.0
      depends_on: 1.5
      risk_of: 2.5
      impacts: 2.0
-    prompt_template: "decision_template"
+      risk_of: 2.5
    prepend_instruction: |
      !!! ENTSCHEIDUNGS-MODUS (HYBRID AI) !!!
      BITTE WÄGE FAKTEN GEGEN FOLGENDE WERTE, PRINZIPIEN UND ZIELE AB:
-  # 3. Empathie / "Ich"-Modus (Lokal & Privat via Ollama)
+  tech_stream:
-  EMPATHY:
+    name: "Technische Referenz"
-    description: "Reaktion auf emotionale Zustände."
+    query_template: "Technische Dokumentation und Code-Beispiele für: {query}"
-    preferred_provider: "openrouter" 
+    filter_types: ["snippet", "reference", "source"]
-    trigger_keywords:
+    top_k: 5
      - "ich fühle"
      - "traurig"
      - "glücklich"
      - "gestresst"
      - "angst"
      - "nervt"
      - "überfordert"
      - "müde"
    inject_types: ["experience", "belief", "profile"]
    edge_boosts:
      based_on: 2.0
      related_to: 2.0
      experienced_in: 2.5
      blocks: 0.1
    prompt_template: "empathy_template"
    prepend_instruction: null
  # 4. Coding / Technical (Gemini Power)
  CODING:
    description: "Technische Anfragen und Programmierung."
    preferred_provider: "gemini" 
    trigger_keywords:
      - "code"
      - "python"
      - "script"
      - "funktion"
      - "bug"
      - "syntax"
      - "json"
      - "yaml"
      - "bash"
    inject_types: ["snippet", "reference", "source"]
    # WP-22: Technische Abhängigkeiten priorisieren
    edge_boosts:
      uses: 2.5
      depends_on: 2.0
      implemented_in: 3.0
    prompt_template: "technical_template"
    prepend_instruction: null
-  # 5. Interview / Datenerfassung (Lokal)
+# --- EBENE 2: STRATEGIEN (Orchestrierung) ---
 strategies:
  FACT_WHEN:
    description: "Abfrage von Zeitpunkten und Historie."
    preferred_provider: "openrouter"
    use_streams:
      - "facts_stream"
      - "biography_stream"
    prompt_template: "fact_synthesis_v1"
  FACT_WHAT:
    description: "Abfrage von Definitionen und Wissen."
    preferred_provider: "openrouter"
    use_streams:
      - "facts_stream"
      - "tech_stream"
    prompt_template: "fact_synthesis_v1"
  DECISION:
    description: "Der User sucht Rat, Strategie oder Abwägung."
    preferred_provider: "gemini"
    use_streams:
      - "values_stream"
      - "facts_stream"
      - "risk_stream"
    prompt_template: "decision_synthesis_v1"
    prepend_instruction: "!!! ENTSCHEIDUNGS-MODUS (AGENTIC MULTI-STREAM) !!!"
  EMPATHY:
    description: "Reaktion auf emotionale Zustände."
    preferred_provider: "openrouter"
    use_streams:
      - "biography_stream"
      - "values_stream"
    prompt_template: "empathy_template"
  CODING:
    description: "Technische Anfragen und Programmierung."
    preferred_provider: "gemini"
    use_streams:
      - "tech_stream"
      - "facts_stream"
    prompt_template: "technical_template"
  INTERVIEW:
    description: "Der User möchte Wissen erfassen."
-    preferred_provider: "openrouter" 
+    preferred_provider: "openrouter"
-    trigger_keywords:
+    use_streams: []
-      - "neue notiz"
+    prompt_template: "interview_template"
      - "etwas notieren"
      - "festhalten"
      - "erstellen"
      - "dokumentieren"
      - "anlegen"
      - "interview"
      - "erfassen"
      - "idee speichern"
      - "draft"
    inject_types: [] 
    edge_boosts: {}
    prompt_template: "interview_template"
    prepend_instruction: null
--- a/config/prompts.yaml
+++ b/config/prompts.yaml
@ -1,7 +1,6 @@
-# config/prompts.yaml — Final V2.6.0 (WP-15b Candidate-Validation)
+# config/prompts.yaml — VERSION 3.0.0 (WP-25: Multi-Stream Agentic RAG)
-# WP-20: Optimierte Cloud-Templates zur Unterdrückung von Modell-Geschwätz.
+# WP-20/22: Cloud-Templates & Semantic Graph Routing erhalten.
-# FIX: Explizite Verbote für Einleitungstexte zur Vermeidung von JSON-Parsing-Fehlern.
+# WP-25: Integration der Multi-Stream Synthese zur Vermeidung von Halluzinationen.
 # WP-15b: Integration der binären edge_validation für den Two-Pass Workflow.
 # OLLAMA: UNVERÄNDERT laut Benutzeranweisung.
 system_prompt: |
@ -270,4 +269,88 @@ edge_validation:
    QUELLE: {chunk_text}
    ZIEL: {target_title} ({target_summary})
    BEZIEHUNG: {edge_kind}
-    Ist diese Verbindung valide? Antworte NUR mit YES oder NO.
+    Ist diese Verbindung valide? Antworte NUR mit YES oder NO.
 # ---------------------------------------------------------
 # 9. WP-25: MULTI-STREAM SYNTHESIS (Intent: SYNTHESIS)
 # ---------------------------------------------------------
 # Diese Templates verarbeiten die Ergebnisse aus parallelen Such-Streams.
 decision_synthesis_v1:
  gemini: |
    Agiere als mein strategischer Partner. Analysiere die Frage: {query}
    Hier sind die Ergebnisse aus verschiedenen Wissens-Streams meiner Mindnet-Basis:
    ### STREAM: WERTE & PRINZIPIEN (Identität)
    {values_stream}
    ### STREAM: OPERATIVE FAKTEN (Realität)
    {facts_stream}
    ### STREAM: RISIKO-ANALYSE (Konsequenzen)
    {risk_stream}
    AUFGABE:
    1. Fasse die Faktenlage kurz zusammen.
    2. Wäge die Fakten hart gegen meine Werte ab. Gibt es Konflikte?
    3. Beurteile das Vorhaben basierend auf dem Risiko-Radar.
    4. Gib eine klare strategische Empfehlung ab.
  openrouter: |
    Strategische Multi-Stream Analyse für: {query}
    Werte-Basis: {values_stream}
    Fakten: {facts_stream}
    Risiken: {risk_stream}
    Bitte wäge ab und gib eine Empfehlung.
  ollama: |
    Du bist mein Entscheidungs-Partner. Analysiere {query} basierend auf diesen Streams:
    WERTE: {values_stream}
    FAKTEN: {facts_stream}
    RISIKEN: {risk_stream}
    Wäge die Fakten gegen die Werte ab und nenne potenzielle Risiken. Nenne Quellen!
 fact_synthesis_v1:
  gemini: |
    Beantworte die Wissensabfrage "{query}" basierend auf diesen Streams:
    FAKTEN: {facts_stream}
    BIOGRAFIE/ERFAHRUNG: {biography_stream}
    TECHNIK: {tech_stream}
    Kombiniere harte Fakten mit persönlichen Erfahrungen, falls vorhanden.
  openrouter: |
    Synthese der Wissens-Streams für: {query}
    Inhalt: {facts_stream} | {biography_stream} | {tech_stream}
  ollama: |
    Fasse das Wissen zu {query} zusammen.
    QUELLE FAKTEN: {facts_stream}
    QUELLE ERFAHRUNG: {biography_stream}
    QUELLE TECHNIK: {tech_stream}
    Antworte präzise und nenne die Quellen.
 # ... (Vorherige Sektionen 1-9 bleiben identisch)
 # ---------------------------------------------------------
 # 10. WP-25: INTENT ROUTING (Intent: CLASSIFY)
 # ---------------------------------------------------------
 intent_router_v1:
  ollama: |
    Analysiere die Nutzeranfrage und wähle die passende Strategie.
    Antworte NUR mit dem Namen der Strategie.
    STRATEGIEN:
    - FACT_WHEN: Fragen nach "Wann", Daten, Historie.
    - FACT_WHAT: Fragen nach "Was", Definitionen, Wissen.
    - DECISION: Rat, Meinung, "Soll ich?", Abwägung.
    - EMPATHY: Emotionen, Reflexion, "Ich fühle mich...".
    - CODING: Programmierung, Skripte, Debugging.
    - INTERVIEW: Dokumentation von Gedanken, Notizen erstellen.
    NACHRICHT: "{query}"
    STRATEGIE:
  gemini: |
    Classify query intent for Mindnet. Options: [FACT_WHEN, FACT_WHAT, DECISION, EMPATHY, CODING, INTERVIEW].
    Query: "{query}"
    Result (One word only):
  openrouter: |
    Select the best Mindnet strategy for: "{query}".
    Strategies: FACT_WHEN, FACT_WHAT, DECISION, EMPATHY, CODING, INTERVIEW.
    Response:
--- a/docs/99_Archive/WP15c_release_notes.md
+++ b/docs/99_Archive/WP15c_release_notes.md
@ -1,4 +1,4 @@
-# Release Notes: Mindnet v2.9.1 (WP15c)
+# Release Notes: Mindnet v2.9.3 (WP15c)
 **Release Date:** 2025-12-31  
 **Type:** Feature Release - Multigraph & Diversity Engine