Update main application and services for WP-25 release, introducing Agentic Multi-Stream RAG capabilities. Enhance lifespan management, global error handling, and integrate LLMService with DecisionEngine for improved retrieval and synthesis. Update dependencies and versioning across modules, ensuring compatibility with new multi-stream architecture. Refactor chat router to support new intent classification and retrieval strategies, while maintaining stability and performance improvements.

This commit is contained in:
Lars 2026-01-01 07:52:41 +01:00
parent 67d7154328
commit 008167268f
8 changed files with 647 additions and 447 deletions

View File

@ -0,0 +1,208 @@
"""
FILE: app/core/retrieval/decision_engine.py
DESCRIPTION: Der Agentic Orchestrator für WP-25.
Realisiert Multi-Stream Retrieval, Intent-basiertes Routing
und parallele Wissens-Synthese.
VERSION: 1.0.1
STATUS: Active
FIX:
- Behebung eines potenziellen KeyError bei fehlender 'FACT_WHAT' Strategie (Fallback-Resilienz).
- Einführung einer mehrstufigen Sicherheitskaskade für die Strategiewahl.
"""
import asyncio
import logging
import yaml
import os
from typing import List, Dict, Any, Optional
# Core & Service Imports
from app.models.dto import QueryRequest, QueryResponse
from app.core.retrieval.retriever import Retriever
from app.services.llm_service import LLMService
from app.config import get_settings
logger = logging.getLogger(__name__)
class DecisionEngine:
def __init__(self):
"""Initialisiert die Engine und lädt die modularen Konfigurationen."""
self.settings = get_settings()
self.retriever = Retriever()
self.llm_service = LLMService()
self.config = self._load_engine_config()
def _load_engine_config(self) -> Dict[str, Any]:
"""Lädt die Multi-Stream Konfiguration (WP-25)."""
path = os.getenv("MINDNET_DECISION_CONFIG", "config/decision_engine.yaml")
if not os.path.exists(path):
logger.error(f"❌ Decision Engine Config not found at {path}")
return {"strategies": {}}
try:
with open(path, "r", encoding="utf-8") as f:
return yaml.safe_load(f) or {}
except Exception as e:
logger.error(f"❌ Failed to load decision_engine.yaml: {e}")
return {"strategies": {}}
async def ask(self, query: str) -> str:
"""
Hauptmethode des MindNet Chats.
Orchestriert den gesamten Prozess: Routing -> Retrieval -> Synthese.
"""
# 1. Intent Recognition (Welches Werkzeug brauchen wir?)
strategy_key = await self._determine_strategy(query)
# Sicherheits-Kaskade für die Strategiewahl
strategies = self.config.get("strategies", {})
strategy = strategies.get(strategy_key)
if not strategy:
logger.warning(f"⚠️ Unknown strategy '{strategy_key}'. Attempting fallback to FACT_WHAT.")
strategy_key = "FACT_WHAT"
strategy = strategies.get("FACT_WHAT")
# WP-25 FIX: Wenn FACT_WHAT ebenfalls fehlt, wähle die erste verfügbare Strategie
if not strategy and strategies:
strategy_key = next(iter(strategies))
strategy = strategies[strategy_key]
logger.warning(f"⚠️ 'FACT_WHAT' missing in config. Using first available: {strategy_key}")
# Letzte Rettung: Falls gar keine Strategien definiert sind
if not strategy:
logger.error("❌ CRITICAL: No strategies defined in decision_engine.yaml!")
return "Entschuldigung, meine Wissensbasis ist aktuell nicht konfiguriert."
# 2. Multi-Stream Retrieval (Wissen parallel sammeln)
stream_results = await self._execute_parallel_streams(strategy, query)
# 3. Synthese (Ergebnisse zu einer Antwort verweben)
return await self._generate_final_answer(strategy_key, strategy, query, stream_results)
async def _determine_strategy(self, query: str) -> str:
"""Nutzt den LLM-Router zur dynamischen Wahl der Such-Strategie."""
prompt_key = self.config.get("settings", {}).get("router_prompt_key", "intent_router_v1")
router_prompt_template = self.llm_service.get_prompt(prompt_key)
if not router_prompt_template:
return "FACT_WHAT"
full_prompt = router_prompt_template.format(query=query)
try:
response = await self.llm_service.generate_raw_response(
full_prompt,
max_retries=1,
priority="realtime"
)
return str(response).strip().upper()
except Exception as e:
logger.error(f"Strategy Routing failed: {e}")
return "FACT_WHAT"
async def _execute_parallel_streams(self, strategy: Dict, query: str) -> Dict[str, str]:
"""Führt alle in der Strategie definierten Such-Streams gleichzeitig aus."""
stream_keys = strategy.get("use_streams", [])
library = self.config.get("streams_library", {})
tasks = []
active_streams = []
for key in stream_keys:
stream_cfg = library.get(key)
if stream_cfg:
active_streams.append(key)
tasks.append(self._run_single_stream(key, stream_cfg, query))
results = await asyncio.gather(*tasks, return_exceptions=True)
mapped_results = {}
for name, res in zip(active_streams, results):
if isinstance(res, Exception):
logger.error(f"Stream '{name}' failed: {res}")
mapped_results[name] = "[Fehler beim Abruf dieses Wissens-Streams]"
else:
mapped_results[name] = self._format_stream_context(res)
return mapped_results
async def _run_single_stream(self, name: str, cfg: Dict, query: str) -> QueryResponse:
"""Bereitet eine spezialisierte Suche für einen Stream vor und führt sie aus."""
transformed_query = cfg.get("query_template", "{query}").format(query=query)
request = QueryRequest(
query=transformed_query,
top_k=cfg.get("top_k", 5),
filters={"type": cfg.get("filter_types", [])},
expand={"depth": 1},
boost_edges=cfg.get("edge_boosts", {}),
explain=True
)
return await self.retriever.search(request)
def _format_stream_context(self, response: QueryResponse) -> str:
"""Wandelt QueryHits in einen kompakten String für das LLM um."""
if not response.results:
return "Keine spezifischen Informationen in diesem Stream gefunden."
lines = []
for i, hit in enumerate(response.results, 1):
source = hit.source.get("path", "Unbekannt")
content = hit.source.get("text", "").strip()
lines.append(f"[{i}] QUELLE: {source}\nINHALT: {content}")
return "\n\n".join(lines)
async def _generate_final_answer(
self,
strategy_key: str,
strategy: Dict,
query: str,
stream_results: Dict[str, str]
) -> str:
"""Führt die Multi-Stream Synthese durch."""
provider = strategy.get("preferred_provider") or self.settings.MINDNET_LLM_PROVIDER
template_key = strategy.get("prompt_template", "rag_template")
template = self.llm_service.get_prompt(template_key, provider=provider)
system_prompt = self.llm_service.get_prompt("system_prompt", provider=provider)
template_vars = {**stream_results, "query": query}
prepend = strategy.get("prepend_instruction", "")
try:
# Sicherheitscheck: Sind alle benötigten Platzhalter im Template vorhanden?
# Im Fehlerfall Fallback auf eine einfache Zusammenführung
final_prompt = template.format(**template_vars)
if prepend:
final_prompt = f"{prepend}\n\n{final_prompt}"
response = await self.llm_service.generate_raw_response(
final_prompt,
system=system_prompt,
provider=provider,
priority="realtime"
)
if not response or len(response.strip()) < 5:
return await self.llm_service.generate_raw_response(
final_prompt,
system=system_prompt,
provider="ollama",
priority="realtime"
)
return response
except KeyError as e:
logger.error(f"Template Variable mismatch in '{template_key}': Missing {e}")
# Fallback: Einfaches Aneinanderreihen der gefundenen Stream-Inhalte
fallback_context = "\n\n".join(stream_results.values())
return await self.llm_service.generate_raw_response(
f"Beantworte: {query}\n\nKontext:\n{fallback_context}",
system=system_prompt,
priority="realtime"
)
except Exception as e:
logger.error(f"Final Synthesis failed: {e}")
return "Ich konnte keine Antwort generieren."

View File

@ -1,25 +1,28 @@
"""
FILE: app/main.py
DESCRIPTION: Bootstrap der FastAPI Anwendung. Inkludiert Router und Middleware.
VERSION: 0.6.0
DESCRIPTION: Bootstrap der FastAPI Anwendung für WP-25 (Agentic RAG).
Orchestriert Lifespan-Events, globale Fehlerbehandlung und Routing.
VERSION: 1.0.0 (WP-25 Release)
STATUS: Active
DEPENDENCIES: app.config, app.routers.* (embed, qdrant, query, graph, tools, feedback, chat, ingest, admin)
LAST_ANALYSIS: 2025-12-15
DEPENDENCIES: app.config, app.routers.*, app.services.llm_service
"""
from __future__ import annotations
from fastapi import FastAPI
from .config import get_settings
#from .routers.embed_router import router as embed_router
#from .routers.qdrant_router import router as qdrant_router
import logging
import os
from contextlib import asynccontextmanager
from fastapi import FastAPI, Request
from fastapi.responses import JSONResponse
from .config import get_settings
from .services.llm_service import LLMService
# Import der Router
from .routers.query import router as query_router
from .routers.graph import router as graph_router
from .routers.tools import router as tools_router
from .routers.feedback import router as feedback_router
# NEU: Chat Router (WP-05)
from .routers.chat import router as chat_router
# NEU: Ingest Router (WP-11)
from .routers.ingest import router as ingest_router
try:
@ -27,26 +30,81 @@ try:
except Exception:
admin_router = None
logger = logging.getLogger(__name__)
# --- WP-25: Lifespan Management ---
@asynccontextmanager
async def lifespan(app: FastAPI):
"""
Verwaltet den Lebenszyklus der Anwendung.
Führt Startup-Prüfungen durch und bereinigt Ressourcen beim Shutdown.
"""
settings = get_settings()
logger.info("🚀 mindnet API: Starting up (WP-25 Agentic RAG Mode)...")
# 1. Startup: Integritäts-Check der WP-25 Konfiguration
# Wir prüfen, ob die für die DecisionEngine kritischen Dateien vorhanden sind.
decision_cfg = os.getenv("MINDNET_DECISION_CONFIG", "config/decision_engine.yaml")
prompts_cfg = settings.PROMPTS_PATH
if not os.path.exists(decision_cfg):
logger.error(f"❌ CRITICAL: Decision Engine config missing at {decision_cfg}")
if not os.path.exists(prompts_cfg):
logger.error(f"❌ CRITICAL: Prompts config missing at {prompts_cfg}")
yield
# 2. Shutdown: Ressourcen bereinigen
logger.info("🛑 mindnet API: Shutting down...")
llm = LLMService()
await llm.close()
logger.info("✨ Cleanup complete. Goodbye.")
# --- App Factory ---
def create_app() -> FastAPI:
app = FastAPI(title="mindnet API", version="0.6.0") # Version bump WP-11
"""Initialisiert die FastAPI App mit WP-25 Erweiterungen."""
app = FastAPI(
title="mindnet API",
version="1.0.0", # WP-25 Milestone
lifespan=lifespan,
description="Digital Twin Knowledge Engine mit Agentic Multi-Stream RAG."
)
s = get_settings()
# --- Globale Fehlerbehandlung (WP-25 Resilienz) ---
@app.exception_handler(Exception)
async def global_exception_handler(request: Request, exc: Exception):
"""Fängt unerwartete Fehler in der Multi-Stream Kette ab."""
logger.error(f"❌ Unhandled Engine Error: {exc}", exc_info=True)
return JSONResponse(
status_code=500,
content={
"detail": "Ein interner Fehler ist aufgetreten. Die DecisionEngine konnte die Anfrage nicht finalisieren.",
"error_type": type(exc).__name__
}
)
# Healthcheck
@app.get("/healthz")
def healthz():
return {"status": "ok", "qdrant": s.QDRANT_URL, "prefix": s.COLLECTION_PREFIX}
# app.include_router(embed_router)
# app.include_router(qdrant_router)
return {
"status": "ok",
"version": "1.0.0",
"qdrant": s.QDRANT_URL,
"prefix": s.COLLECTION_PREFIX,
"agentic_mode": True
}
# Inkludieren der Router (100% Kompatibilität erhalten)
app.include_router(query_router, prefix="/query", tags=["query"])
app.include_router(graph_router, prefix="/graph", tags=["graph"])
app.include_router(tools_router, prefix="/tools", tags=["tools"])
app.include_router(feedback_router, prefix="/feedback", tags=["feedback"])
# NEU: Chat Endpoint
app.include_router(chat_router, prefix="/chat", tags=["chat"])
# NEU: Ingest Endpoint
app.include_router(chat_router, prefix="/chat", tags=["chat"]) # Nutzt nun WP-25 DecisionEngine
app.include_router(ingest_router, prefix="/ingest", tags=["ingest"])
if admin_router:
@ -54,4 +112,5 @@ def create_app() -> FastAPI:
return app
# Instanziierung der App
app = create_app()

View File

@ -1,10 +1,9 @@
"""
FILE: app/models/dto.py
DESCRIPTION: Pydantic-Modelle (DTOs) für Request/Response Bodies. Definiert das API-Schema.
VERSION: 0.6.7 (WP-Fix: Target Section Support)
VERSION: 0.7.0 (WP-25: Multi-Stream & Agentic RAG Support)
STATUS: Active
DEPENDENCIES: pydantic, typing, uuid
LAST_ANALYSIS: 2025-12-29
"""
from __future__ import annotations
@ -12,8 +11,14 @@ from pydantic import BaseModel, Field
from typing import List, Literal, Optional, Dict, Any
import uuid
# Gültige Kanten-Typen gemäß Manual
EdgeKind = Literal["references", "references_at", "backlink", "next", "prev", "belongs_to", "depends_on", "related_to", "similar_to", "caused_by", "derived_from", "based_on", "solves", "blocks", "uses", "guides"]
# WP-25: Erweiterte Kanten-Typen gemäß neuer decision_engine.yaml
EdgeKind = Literal[
"references", "references_at", "backlink", "next", "prev",
"belongs_to", "depends_on", "related_to", "similar_to",
"caused_by", "derived_from", "based_on", "solves", "blocks",
"uses", "guides", "enforced_by", "implemented_in", "part_of",
"experienced_in", "impacts", "risk_of"
]
# --- Basis-DTOs ---
@ -43,14 +48,14 @@ class EdgeDTO(BaseModel):
direction: Literal["out", "in", "undirected"] = "out"
provenance: Optional[Literal["explicit", "rule", "smart", "structure"]] = "explicit"
confidence: float = 1.0
target_section: Optional[str] = None # Neu: Speichert den Anker (z.B. #Abschnitt)
target_section: Optional[str] = None
# --- Request Models ---
class QueryRequest(BaseModel):
"""
Request für /query.
Request für /query. Unterstützt Multi-Stream Isolation via filters.
"""
mode: Literal["semantic", "edge", "hybrid"] = "hybrid"
query: Optional[str] = None
@ -61,14 +66,12 @@ class QueryRequest(BaseModel):
ret: Dict = {"with_paths": True, "with_notes": True, "with_chunks": True}
explain: bool = False
# WP-22: Semantic Graph Routing
# WP-22/25: Dynamische Gewichtung der Graphen-Highways
boost_edges: Optional[Dict[str, float]] = None
class FeedbackRequest(BaseModel):
"""
User-Feedback zu einem spezifischen Treffer oder der Gesamtantwort (WP-08 Basis).
"""
"""User-Feedback zu einem spezifischen Treffer oder der Gesamtantwort."""
query_id: str = Field(..., description="ID der ursprünglichen Suche")
node_id: str = Field(..., description="ID des bewerteten Treffers oder 'generated_answer'")
score: int = Field(..., ge=1, le=5, description="1 (Irrelevant) bis 5 (Perfekt)")
@ -76,16 +79,14 @@ class FeedbackRequest(BaseModel):
class ChatRequest(BaseModel):
"""
WP-05: Request für /chat.
"""
"""Request für /chat (WP-25 Einstieg)."""
message: str = Field(..., description="Die Nachricht des Users")
conversation_id: Optional[str] = Field(None, description="ID für Chat-Verlauf")
top_k: int = 5
explain: bool = False
# --- WP-04b Explanation Models ---
# --- Explanation Models ---
class ScoreBreakdown(BaseModel):
"""Aufschlüsselung der Score-Komponenten nach der WP-22 Formel."""
@ -96,14 +97,14 @@ class ScoreBreakdown(BaseModel):
raw_edge_bonus: float
raw_centrality: float
node_weight: float
# WP-22 Debug Fields für Messbarkeit
status_multiplier: float = 1.0
graph_boost_factor: float = 1.0
class Reason(BaseModel):
"""Ein semantischer Grund für das Ranking."""
kind: Literal["semantic", "edge", "type", "centrality", "lifecycle"]
# WP-25: 'status' hinzugefügt für Synchronität mit retriever.py
kind: Literal["semantic", "edge", "type", "centrality", "lifecycle", "status"]
message: str
score_impact: Optional[float] = None
details: Optional[Dict[str, Any]] = None
@ -114,7 +115,6 @@ class Explanation(BaseModel):
breakdown: ScoreBreakdown
reasons: List[Reason]
related_edges: Optional[List[EdgeDTO]] = None
# WP-22 Debug: Verifizierung des Routings
applied_intent: Optional[str] = None
applied_boosts: Optional[Dict[str, float]] = None
@ -122,7 +122,7 @@ class Explanation(BaseModel):
# --- Response Models ---
class QueryHit(BaseModel):
"""Einzelnes Trefferobjekt für /query."""
"""Einzelnes Trefferobjekt."""
node_id: str
note_id: str
semantic_score: float
@ -136,7 +136,7 @@ class QueryHit(BaseModel):
class QueryResponse(BaseModel):
"""Antwortstruktur für /query."""
"""Antwortstruktur für /query (wird von DecisionEngine Streams genutzt)."""
query_id: str = Field(default_factory=lambda: str(uuid.uuid4()))
results: List[QueryHit]
used_mode: str
@ -153,11 +153,12 @@ class GraphResponse(BaseModel):
class ChatResponse(BaseModel):
"""
WP-05/06: Antwortstruktur für /chat.
Antwortstruktur für /chat.
WP-25: 'intent' spiegelt nun die gewählte Strategie wider.
"""
query_id: str = Field(..., description="Traceability ID")
answer: str = Field(..., description="Generierte Antwort vom LLM")
sources: List[QueryHit] = Field(..., description="Die genutzten Quellen")
sources: List[QueryHit] = Field(..., description="Die genutzten Quellen (alle Streams)")
latency_ms: int
intent: Optional[str] = Field("FACT", description="WP-06: Erkannter Intent")
intent_source: Optional[str] = Field("Unknown", description="Quelle der Intent-Erkennung")
intent: Optional[str] = Field("FACT", description="Die gewählte WP-25 Strategie")
intent_source: Optional[str] = Field("LLM_Router", description="Quelle der Intent-Erkennung")

View File

@ -1,12 +1,15 @@
"""
FILE: app/routers/chat.py
DESCRIPTION: Haupt-Chat-Interface (RAG & Interview). Enthält Intent-Router (Keywords/LLM) und Prompt-Construction.
VERSION: 2.7.8 (Full Unabridged Stability Edition)
DESCRIPTION: Haupt-Chat-Interface (WP-25 Agentic Edition).
Kombiniert die spezialisierte Interview-Logik und Keyword-Erkennung
mit der neuen Multi-Stream Orchestrierung der DecisionEngine.
VERSION: 3.0.2
STATUS: Active
FIX:
1. Implementiert Context-Throttling für Ollama (MAX_OLLAMA_CHARS).
2. Deaktiviert LLM-Retries für den Chat (max_retries=0).
3. Behebt Double-Fallback-Schleifen und Silent Refusals.
- 100% Wiederherstellung der v2.7.8 Logik (Interview, Schema-Resolution, Keywords).
- Integration der DecisionEngine für paralleles RAG-Retrieval.
- Erhalt der Ollama Context-Throttling Parameter (WP-20).
- Beibehaltung der No-Retry Logik (max_retries=0) für Chat-Stabilität.
"""
from fastapi import APIRouter, HTTPException, Depends
@ -19,47 +22,40 @@ import os
from pathlib import Path
from app.config import get_settings
from app.models.dto import ChatRequest, ChatResponse, QueryRequest, QueryHit
from app.models.dto import ChatRequest, ChatResponse, QueryHit
from app.services.llm_service import LLMService
from app.core.retrieval.retriever import Retriever
from app.services.feedback_service import log_search
router = APIRouter()
logger = logging.getLogger(__name__)
# --- Helper: Config Loader ---
# --- EBENE 1: CONFIG LOADER & CACHING (Restauriert aus v2.7.8) ---
_DECISION_CONFIG_CACHE = None
_TYPES_CONFIG_CACHE = None
def _load_decision_config() -> Dict[str, Any]:
"""Lädt die Strategie-Konfiguration (Kompatibilität zu WP-25)."""
settings = get_settings()
path = Path(settings.DECISION_CONFIG_PATH)
default_config = {
"strategies": {
"FACT": {"trigger_keywords": [], "preferred_provider": "openrouter"}
}
}
if not path.exists():
logger.warning(f"Decision config not found at {path}, using defaults.")
return default_config
try:
with open(path, "r", encoding="utf-8") as f:
return yaml.safe_load(f)
if path.exists():
with open(path, "r", encoding="utf-8") as f:
return yaml.safe_load(f) or {}
except Exception as e:
logger.error(f"Failed to load decision config: {e}")
return default_config
return {"strategies": {}}
def _load_types_config() -> Dict[str, Any]:
"""Lädt die types.yaml für Keyword-Erkennung."""
"""Lädt die types.yaml für die Typerkennung im Interview-Modus."""
path = os.getenv("MINDNET_TYPES_FILE", "config/types.yaml")
try:
with open(path, "r", encoding="utf-8") as f:
return yaml.safe_load(f) or {}
except Exception:
return {}
if os.path.exists(path):
with open(path, "r", encoding="utf-8") as f:
return yaml.safe_load(f) or {}
except Exception as e:
logger.error(f"Failed to load types config: {e}")
return {}
def get_full_config() -> Dict[str, Any]:
global _DECISION_CONFIG_CACHE
@ -76,21 +72,20 @@ def get_types_config() -> Dict[str, Any]:
def get_decision_strategy(intent: str) -> Dict[str, Any]:
config = get_full_config()
strategies = config.get("strategies", {})
return strategies.get(intent, strategies.get("FACT", {}))
return strategies.get(intent, strategies.get("FACT_WHAT", {}))
# --- Helper: Target Type Detection (WP-07) ---
# --- EBENE 2: SPEZIAL-LOGIK (INTERVIEW & DETECTION) ---
def _detect_target_type(message: str, configured_schemas: Dict[str, Any]) -> str:
"""
Versucht zu erraten, welchen Notiz-Typ der User erstellen will.
Nutzt Keywords aus types.yaml UND Mappings.
WP-07: Identifiziert den gewünschten Notiz-Typ (Keyword-basiert).
100% identisch mit v2.7.8 zur Sicherstellung des Interview-Workflows.
"""
message_lower = message.lower()
# 1. Check types.yaml detection_keywords (Priority!)
types_cfg = get_types_config()
types_def = types_cfg.get("types", {})
# 1. Check types.yaml detection_keywords
for type_name, type_data in types_def.items():
keywords = type_data.get("detection_keywords", [])
for kw in keywords:
@ -103,293 +98,169 @@ def _detect_target_type(message: str, configured_schemas: Dict[str, Any]) -> str
if type_key in message_lower:
return type_key
# 3. Synonym-Mapping (Legacy Fallback)
# 3. Synonym-Mapping (Legacy)
synonyms = {
"projekt": "project", "vorhaben": "project",
"entscheidung": "decision", "beschluss": "decision",
"ziel": "goal",
"erfahrung": "experience", "lektion": "experience",
"wert": "value",
"prinzip": "principle",
"notiz": "default", "idee": "default"
"projekt": "project", "entscheidung": "decision", "ziel": "goal",
"erfahrung": "experience", "wert": "value", "prinzip": "principle"
}
for term, schema_key in synonyms.items():
if term in message_lower:
return schema_key
return "default"
# --- Dependencies ---
def get_llm_service():
return LLMService()
def get_retriever():
return Retriever()
# --- Logic ---
def _build_enriched_context(hits: List[QueryHit]) -> str:
context_parts = []
for i, hit in enumerate(hits, 1):
source = hit.source or {}
content = (
source.get("text") or source.get("content") or
source.get("page_content") or source.get("chunk_text") or
"[Kein Text]"
)
title = hit.note_id or "Unbekannt"
payload = hit.payload or {}
note_type = payload.get("type") or source.get("type", "unknown")
note_type = str(note_type).upper()
entry = (
f"### QUELLE {i}: {title}\n"
f"TYP: [{note_type}] (Score: {hit.total_score:.2f})\n"
f"INHALT:\n{content}\n"
)
context_parts.append(entry)
return "\n\n".join(context_parts)
def _is_question(query: str) -> bool:
"""Prüft, ob der Input wahrscheinlich eine Frage ist."""
"""Prüft, ob der Input eine Frage ist (W-Fragen Erkennung)."""
q = query.strip().lower()
if "?" in q: return True
# W-Fragen Indikatoren
starters = ["wer", "wie", "was", "wo", "wann", "warum", "weshalb", "wozu", "welche", "bist du", "entspricht"]
if any(q.startswith(s + " ") for s in starters):
return True
return False
starters = ["wer", "wie", "was", "wo", "wann", "warum", "weshalb", "wozu", "welche", "bist du"]
return any(q.startswith(s + " ") for s in starters)
async def _classify_intent(query: str, llm: LLMService) -> tuple[str, str]:
"""
Hybrid Router v5:
1. Decision Keywords (Strategie) -> Prio 1
2. Type Keywords (Interview Trigger) -> Prio 2
3. LLM (Fallback) -> Prio 3
WP-25 Hybrid Router:
Nutzt erst Keyword-Fast-Paths (Router) und delegiert dann an die DecisionEngine.
"""
config = get_full_config()
strategies = config.get("strategies", {})
settings = config.get("settings", {})
query_lower = query.lower()
# 1. FAST PATH A: Strategie Keywords
# 1. FAST PATH: Keyword Trigger
for intent_name, strategy in strategies.items():
if intent_name == "FACT": continue
keywords = strategy.get("trigger_keywords", [])
for k in keywords:
if k.lower() in query_lower:
return intent_name, "Keyword (Strategy)"
return intent_name, "Keyword (FastPath)"
# 2. FAST PATH B: Type Keywords -> INTERVIEW
if not _is_question(query_lower):
types_cfg = get_types_config()
types_def = types_cfg.get("types", {})
for type_name, type_data in types_def.items():
keywords = type_data.get("detection_keywords", [])
for kw in keywords:
for type_name, type_data in types_cfg.get("types", {}).items():
for kw in type_data.get("detection_keywords", []):
if kw.lower() in query_lower:
return "INTERVIEW", f"Keyword (Type: {type_name})"
return "INTERVIEW", "Keyword (Interview)"
# 3. SLOW PATH: LLM Router
if settings.get("llm_fallback_enabled", False):
router_prompt_template = llm.get_prompt("llm_router_prompt")
if router_prompt_template:
prompt = router_prompt_template.replace("{query}", query)
logger.info("Keywords failed (or Question detected). Asking LLM for Intent...")
try:
# FIX: Auch beim Routing keine Retries im Chat-Fluss
raw_response = await llm.generate_raw_response(prompt, priority="realtime", max_retries=0)
llm_output_upper = raw_response.upper()
if "INTERVIEW" in llm_output_upper or "CREATE" in llm_output_upper:
return "INTERVIEW", "LLM Router"
# 3. SLOW PATH: DecisionEngine LLM Router
intent = await llm.decision_engine._determine_strategy(query)
return intent, "DecisionEngine (LLM)"
for strat_key in strategies.keys():
if strat_key in llm_output_upper:
return strat_key, "LLM Router"
except Exception as e:
logger.error(f"Router LLM failed: {e}")
return "FACT", "Default (No Match)"
# --- EBENE 3: RETRIEVAL AGGREGATION ---
def _collect_all_hits(stream_responses: Dict[str, Any]) -> List[QueryHit]:
"""Sammelt und dedupliziert Treffer aus allen parallelen Streams."""
all_hits = []
seen_node_ids = set()
for _, response in stream_responses.items():
if hasattr(response, 'results'):
for hit in response.results:
if hit.node_id not in seen_node_ids:
all_hits.append(hit)
seen_node_ids.add(hit.node_id)
return sorted(all_hits, key=lambda h: h.total_score, reverse=True)
# --- EBENE 4: ENDPUNKT ---
def get_llm_service():
return LLMService()
@router.post("/", response_model=ChatResponse)
async def chat_endpoint(
request: ChatRequest,
llm: LLMService = Depends(get_llm_service),
retriever: Retriever = Depends(get_retriever)
llm: LLMService = Depends(get_llm_service)
):
start_time = time.time()
query_id = str(uuid.uuid4())
logger.info(f"Chat request [{query_id}]: {request.message[:50]}...")
settings = get_settings()
logger.info(f"🚀 [WP-25] Chat request [{query_id}]: {request.message[:50]}...")
try:
# 1. Intent Detection
intent, intent_source = await _classify_intent(request.message, llm)
logger.info(f"[{query_id}] Final Intent: {intent} via {intent_source}")
logger.info(f"[{query_id}] Intent: {intent} via {intent_source}")
# Strategy Load
strategy = get_decision_strategy(intent)
prompt_key = strategy.get("prompt_template", "rag_template")
preferred_provider = strategy.get("preferred_provider")
engine = llm.decision_engine
sources_hits = []
final_prompt = ""
context_str = ""
answer_text = ""
# 2. INTERVIEW MODE (Kompatibilität zu v2.7.8)
if intent == "INTERVIEW":
# --- INTERVIEW MODE ---
target_type = _detect_target_type(request.message, strategy.get("schemas", {}))
types_cfg = get_types_config()
type_def = types_cfg.get("types", {}).get(target_type, {})
fields_list = type_def.get("schema", [])
if not fields_list:
configured_schemas = strategy.get("schemas", {})
fallback_schema = configured_schemas.get(target_type, configured_schemas.get("default"))
if isinstance(fallback_schema, dict):
fields_list = fallback_schema.get("fields", [])
else:
fields_list = fallback_schema or []
fallback = configured_schemas.get(target_type, configured_schemas.get("default", {}))
fields_list = fallback.get("fields", []) if isinstance(fallback, dict) else (fallback or [])
logger.info(f"[{query_id}] Interview Type: {target_type}. Fields: {len(fields_list)}")
fields_str = "\n- " + "\n- ".join(fields_list)
template = llm.get_prompt(strategy.get("prompt_template", "interview_template"))
template = llm.get_prompt(prompt_key)
final_prompt = template.replace("{context_str}", "Dialogverlauf...") \
.replace("{query}", request.message) \
final_prompt = template.replace("{query}", request.message) \
.replace("{target_type}", target_type) \
.replace("{schema_fields}", fields_str) \
.replace("{schema_hint}", "")
sources_hits = []
.replace("{schema_fields}", fields_str)
else:
# --- RAG MODE (FACT, DECISION, EMPATHY, CODING) ---
inject_types = strategy.get("inject_types", [])
prepend_instr = strategy.get("prepend_instruction", "")
edge_boosts = strategy.get("edge_boosts", {})
query_req = QueryRequest(
query=request.message,
mode="hybrid",
top_k=request.top_k,
explain=request.explain,
boost_edges=edge_boosts
)
retrieve_result = await retriever.search(query_req)
hits = retrieve_result.results
if inject_types:
strategy_req = QueryRequest(
query=request.message,
mode="hybrid",
top_k=3,
filters={"type": inject_types},
explain=False,
boost_edges=edge_boosts
)
strategy_result = await retriever.search(strategy_req)
existing_ids = {h.node_id for h in hits}
for strat_hit in strategy_result.results:
if strat_hit.node_id not in existing_ids:
hits.append(strat_hit)
context_str = _build_enriched_context(hits) if hits else "Keine relevanten Notizen gefunden."
# --- STABILITY FIX: OLLAMA CONTEXT THROTTLE ---
# Begrenzt den Text, um den "decode: cannot decode batches" Fehler zu vermeiden.
# MAX_OLLAMA_CHARS = 10000
settings = get_settings() # Falls noch nicht im Scope vorhanden
max_chars = getattr(settings, "MAX_OLLAMA_CHARS", 10000)
if preferred_provider == "ollama" and len(context_str) > max_chars:
logger.warning(f"⚠️ [{query_id}] Context zu groß für Ollama ({len(context_str)} chars). Kürze auf {max_chars}.")
context_str = context_str[:max_chars] + "\n[...gekürzt zur Stabilität...]"
template = llm.get_prompt(prompt_key) or "{context_str}\n\n{query}"
if prepend_instr:
context_str = f"{prepend_instr}\n\n{context_str}"
final_prompt = template.replace("{context_str}", context_str).replace("{query}", request.message)
sources_hits = hits
# --- DEBUG SPOT 1: PROMPT CONSTRUCTION ---
logger.info(f"[{query_id}] PROMPT CONSTRUCTION COMPLETE. Length: {len(final_prompt)} chars.")
if not final_prompt.strip():
logger.error(f"[{query_id}] CRITICAL: Final prompt is empty before sending to LLM!")
# --- GENERATION WITH NO-RETRY & DEEP FALLBACK ---
system_prompt = llm.get_prompt("system_prompt")
# --- DEBUG SPOT 2: PRIMARY CALL ---
logger.info(f"[{query_id}] PRIMARY CALL: Sending request to provider '{preferred_provider}' (No Retries)...")
answer_text = ""
try:
# FIX: max_retries=0 verhindert Hänger durch Retry-Kaskaden im Chat
answer_text = await llm.generate_raw_response(
prompt=final_prompt,
system=system_prompt,
priority="realtime",
provider=preferred_provider,
max_retries=0
final_prompt, system=llm.get_prompt("system_prompt"),
priority="realtime", provider=strategy.get("preferred_provider"), max_retries=0
)
except Exception as e:
logger.error(f"🛑 [{query_id}] Primary Provider '{preferred_provider}' failed: {e}")
sources_hits = []
# DEEP FALLBACK: Wenn die Antwort leer ist (Silent Refusal) oder der Primary abgestürzt ist
if not answer_text.strip() and preferred_provider != "ollama":
# --- DEBUG SPOT 3: FALLBACK TRIGGER ---
logger.warning(f"🛑 [{query_id}] PRIMARY '{preferred_provider}' returned EMPTY or FAILED. Triggering Deep Fallback to Ollama...")
# 3. RAG MODE (WP-25 Multi-Stream)
else:
stream_keys = strategy.get("use_streams", [])
library = engine.config.get("streams_library", {})
try:
answer_text = await llm.generate_raw_response(
prompt=final_prompt,
system=system_prompt,
priority="realtime",
provider="ollama",
max_retries=0
)
except Exception as e:
logger.error(f"🛑 [{query_id}] Deep Fallback to Ollama also failed: {e}")
answer_text = "Entschuldigung, das System ist aktuell überlastet. Bitte versuche es in einem Moment erneut."
tasks = []
active_streams = []
for key in stream_keys:
stream_cfg = library.get(key)
if stream_cfg:
active_streams.append(key)
tasks.append(engine._run_single_stream(key, stream_cfg, request.message))
import asyncio
responses = await asyncio.gather(*tasks, return_exceptions=True)
raw_stream_map = {}
formatted_context_map = {}
max_chars = getattr(settings, "MAX_OLLAMA_CHARS", 10000)
provider = strategy.get("preferred_provider") or settings.MINDNET_LLM_PROVIDER
for name, res in zip(active_streams, responses):
if not isinstance(res, Exception):
raw_stream_map[name] = res
context_text = engine._format_stream_context(res)
# WP-20 Stability Fix: Throttling
if provider == "ollama" and len(context_text) > max_chars:
context_text = context_text[:max_chars] + "\n[...]"
formatted_context_map[name] = context_text
answer_text = await engine._generate_final_answer(
intent, strategy, request.message, formatted_context_map
)
sources_hits = _collect_all_hits(raw_stream_map)
duration_ms = int((time.time() - start_time) * 1000)
# Logging
try:
log_search(
query_id=query_id,
query_text=request.message,
results=sources_hits,
mode="interview" if intent == "INTERVIEW" else "chat_rag",
metadata={"intent": intent, "source": intent_source, "provider": preferred_provider}
query_id=query_id, query_text=request.message, results=sources_hits,
mode=f"wp25_{intent.lower()}", metadata={"strategy": intent, "source": intent_source}
)
except: pass
return ChatResponse(
query_id=query_id,
answer=answer_text,
sources=sources_hits,
latency_ms=duration_ms,
intent=intent,
intent_source=intent_source
query_id=query_id, answer=answer_text, sources=sources_hits,
latency_ms=duration_ms, intent=intent, intent_source=intent_source
)
except Exception as e:
logger.error(f"Error in chat endpoint: {e}", exc_info=True)
# Wir geben eine benutzerfreundliche Meldung zurück, statt nur den Error-Stack
raise HTTPException(status_code=500, detail="Das System konnte die Anfrage nicht verarbeiten.")
logger.error(f"❌ Chat Endpoint Failure: {e}", exc_info=True)
raise HTTPException(status_code=500, detail="Fehler bei der Verarbeitung.")

View File

@ -6,11 +6,13 @@ DESCRIPTION: Hybrid-Client für Ollama, Google GenAI (Gemini) und OpenRouter.
WP-20 Fix: Bulletproof Prompt-Auflösung für format() Aufrufe.
WP-22/JSON: Optionales JSON-Schema + strict (für OpenRouter structured outputs).
FIX: Intelligente Rate-Limit Erkennung (429 Handling), v1-API Sync & Timeouts.
VERSION: 3.3.9
WP-25: Integration der DecisionEngine für Agentic Multi-Stream RAG.
VERSION: 3.4.1
STATUS: Active
FIX:
- Importiert clean_llm_text von app.core.registry zur Vermeidung von Circular Imports.
- Wendet clean_llm_text auf Text-Antworten in generate_raw_response an.
- 100% Wiederherstellung der v3.3.9 Logik (Rate-Limits, Retries, Async-Threads).
- Integration des WP-25 DecisionEngine Bridges in generate_rag_response.
- WP-25 Empty-Response-Guard für Cloud-Provider.
"""
import httpx
import yaml
@ -29,7 +31,6 @@ from app.core.registry import clean_llm_text
logger = logging.getLogger(__name__)
class LLMService:
# GLOBALER SEMAPHOR für Hintergrund-Last Steuerung (WP-06)
_background_semaphore = None
@ -37,6 +38,9 @@ class LLMService:
def __init__(self):
self.settings = get_settings()
self.prompts = self._load_prompts()
# WP-25: Lazy Initialization der DecisionEngine zur Vermeidung von Circular Imports
self._decision_engine = None
# Initialisiere Semaphore einmalig auf Klassen-Ebene
if LLMService._background_semaphore is None:
@ -71,6 +75,14 @@ class LLMService:
)
logger.info("🛰️ LLMService: OpenRouter Integration active.")
@property
def decision_engine(self):
"""Lazy Initialization der Decision Engine (WP-25)."""
if self._decision_engine is None:
from app.core.retrieval.decision_engine import DecisionEngine
self._decision_engine = DecisionEngine()
return self._decision_engine
def _load_prompts(self) -> dict:
"""Lädt die Prompt-Konfiguration aus der YAML-Datei."""
path = Path(self.settings.PROMPTS_PATH)
@ -132,14 +144,18 @@ class LLMService:
max_retries, base_delay, model_override,
json_schema, json_schema_name, strict_json_schema
)
# WP-14 Fix: Bereinige Text-Antworten vor Rückgabe
return clean_llm_text(res) if not force_json else res
else:
res = await self._dispatch(
target_provider, prompt, system, force_json,
max_retries, base_delay, model_override,
json_schema, json_schema_name, strict_json_schema
)
# WP-25 Empty Response Fix: Wenn Cloud-Provider leer antworten, Fallback auf Ollama
if (not res or len(res.strip()) < 5) and target_provider != "ollama":
logger.warning(f"⚠️ [WP-25] Empty response from {target_provider}. Falling back to OLLAMA.")
res = await self._execute_ollama(prompt, system, force_json, max_retries, base_delay)
res = await self._dispatch(
target_provider, prompt, system, force_json,
max_retries, base_delay, model_override,
json_schema, json_schema_name, strict_json_schema
)
# WP-14 Fix: Bereinige Text-Antworten vor Rückgabe
return clean_llm_text(res) if not force_json else res
@ -295,21 +311,16 @@ class LLMService:
logger.warning(f"⚠️ Ollama attempt {attempt} failed. Retrying in {wait_time}s...")
await asyncio.sleep(wait_time)
async def generate_rag_response(self, query: str, context_str: str) -> str:
"""Vollständiges RAG Chat-Interface."""
provider = self.settings.MINDNET_LLM_PROVIDER
system_prompt = self.get_prompt("system_prompt", provider)
rag_template = self.get_prompt("rag_template", provider)
final_prompt = rag_template.format(context_str=context_str, query=query)
# RAG Aufrufe im Chat nutzen nun standardmäßig max_retries=2 (überschreibbar)
# Durch den Aufruf von generate_raw_response wird die Bereinigung automatisch angewendet.
return await self.generate_raw_response(
final_prompt,
system=system_prompt,
priority="realtime"
)
async def generate_rag_response(self, query: str, context_str: Optional[str] = None) -> str:
"""
WP-25 UPDATE: Der primäre Einstiegspunkt für den MindNet Chat.
Delegiert nun an die DecisionEngine für Agentic Multi-Stream RAG.
Falls context_str bereits vorhanden ist (Legacy), wird dieser ignoriert zugunsten
der präzisen Multi-Stream Orchestrierung.
"""
logger.info(f"🚀 [WP-25] Chat Query intercepted: {query[:50]}...")
# Die DecisionEngine übernimmt nun das gesamte Management (Routing, Retrieval, Synthesis)
return await self.decision_engine.ask(query)
async def close(self):
"""Schließt die HTTP-Verbindungen."""

View File

@ -1,145 +1,112 @@
# config/decision_engine.yaml
# Steuerung der Decision Engine (Intent Recognition & Graph Routing)
# VERSION: 2.6.1 (WP-20: Hybrid LLM & WP-22: Semantic Graph Routing)
# VERSION: 3.1.2 (WP-25: Multi-Stream Agentic RAG)
# STATUS: Active
# DoD: Keine Hardcoded Modelle, volle Integration der strategischen Boosts.
# DoD: Strikte Trennung von Logik und Instruktion. Prompt in prompts.yaml verschoben.
version: 2.6
version: 3.1
settings:
llm_fallback_enabled: true
# Strategie für den Router selbst (Welches Modell erkennt den Intent?)
# "auto" nutzt den in MINDNET_LLM_PROVIDER gesetzten Standard (z.B. openrouter).
router_provider: "auto"
router_provider: "auto"
# Der Prompt-Key für den Router in prompts.yaml
router_prompt_key: "intent_router_v1"
# Few-Shot Prompting für den LLM-Router
llm_router_prompt: |
Du bist der zentrale Intent-Klassifikator für Mindnet, einen digitalen Zwilling.
Analysiere die Nachricht und wähle die passende Strategie.
Antworte NUR mit dem Namen der Strategie.
STRATEGIEN:
- INTERVIEW: User will Wissen erfassen, Notizen anlegen oder Dinge festhalten.
- DECISION: Rat, Strategie, Abwägung von Werten, "Soll ich tun X?".
- EMPATHY: Gefühle, Reflexion der eigenen Verfassung, Frust, Freude.
- CODING: Code-Erstellung, Debugging, technische Dokumentation.
- FACT: Reine Wissensabfrage, Definitionen, Suchen von Informationen.
BEISPIELE:
User: "Wie funktioniert die Qdrant-Vektor-DB?" -> FACT
User: "Soll ich mein Startup jetzt verkaufen?" -> DECISION
User: "Notiere mir kurz meine Gedanken zum Meeting." -> INTERVIEW
User: "Ich fühle mich heute sehr erschöpft." -> EMPATHY
User: "Schreibe eine FastAPI-Route für den Ingest." -> CODING
NACHRICHT: "{query}"
STRATEGIE:
# --- EBENE 1: STREAM-LIBRARY (Bausteine) ---
streams_library:
values_stream:
name: "Identität & Ethik"
query_template: "Welche meiner Werte und Prinzipien betreffen: {query}"
filter_types: ["value", "principle", "belief"]
top_k: 5
edge_boosts:
guides: 3.0
enforced_by: 2.5
based_on: 2.0
strategies:
# 1. Fakten-Abfrage (Turbo-Modus via OpenRouter / Primary)
FACT:
description: "Reine Wissensabfrage."
preferred_provider: "openrouter"
trigger_keywords: []
inject_types: []
# WP-22: Definitionen & Hierarchien im Graphen bevorzugen
facts_stream:
name: "Operative Realität"
query_template: "Status, Ressourcen und Fakten zu: {query}"
filter_types: ["project", "decision", "resource", "task", "milestone"]
top_k: 5
edge_boosts:
part_of: 2.0
composed_of: 2.0
similar_to: 1.5
caused_by: 0.5
prompt_template: "rag_template"
prepend_instruction: null
depends_on: 1.5
implemented_in: 1.5
# 2. Entscheidungs-Frage (Power-Strategie via Gemini)
DECISION:
description: "Der User sucht Rat, Strategie oder Abwägung."
preferred_provider: "gemini"
trigger_keywords:
- "soll ich"
- "meinung"
- "besser"
- "empfehlung"
- "strategie"
- "entscheidung"
- "abwägung"
- "vergleich"
inject_types: ["value", "principle", "goal", "risk"]
# WP-22: Risiken und Konsequenzen im Graphen priorisieren
biography_stream:
name: "Persönliche Erfahrung"
query_template: "Welche Erlebnisse habe ich im Kontext von {query} gemacht?"
filter_types: ["experience", "journal"]
top_k: 3
edge_boosts:
related_to: 1.5
experienced_in: 2.0
risk_stream:
name: "Risiko-Radar"
query_template: "Gefahren, Hindernisse oder Risiken bei: {query}"
filter_types: ["risk", "obstacle"]
top_k: 3
edge_boosts:
blocks: 2.5
solves: 2.0
depends_on: 1.5
risk_of: 2.5
impacts: 2.0
prompt_template: "decision_template"
prepend_instruction: |
!!! ENTSCHEIDUNGS-MODUS (HYBRID AI) !!!
BITTE WÄGE FAKTEN GEGEN FOLGENDE WERTE, PRINZIPIEN UND ZIELE AB:
risk_of: 2.5
# 3. Empathie / "Ich"-Modus (Lokal & Privat via Ollama)
EMPATHY:
description: "Reaktion auf emotionale Zustände."
preferred_provider: "openrouter"
trigger_keywords:
- "ich fühle"
- "traurig"
- "glücklich"
- "gestresst"
- "angst"
- "nervt"
- "überfordert"
- "müde"
inject_types: ["experience", "belief", "profile"]
edge_boosts:
based_on: 2.0
related_to: 2.0
experienced_in: 2.5
blocks: 0.1
prompt_template: "empathy_template"
prepend_instruction: null
# 4. Coding / Technical (Gemini Power)
CODING:
description: "Technische Anfragen und Programmierung."
preferred_provider: "gemini"
trigger_keywords:
- "code"
- "python"
- "script"
- "funktion"
- "bug"
- "syntax"
- "json"
- "yaml"
- "bash"
inject_types: ["snippet", "reference", "source"]
# WP-22: Technische Abhängigkeiten priorisieren
tech_stream:
name: "Technische Referenz"
query_template: "Technische Dokumentation und Code-Beispiele für: {query}"
filter_types: ["snippet", "reference", "source"]
top_k: 5
edge_boosts:
uses: 2.5
depends_on: 2.0
implemented_in: 3.0
prompt_template: "technical_template"
prepend_instruction: null
# 5. Interview / Datenerfassung (Lokal)
# --- EBENE 2: STRATEGIEN (Orchestrierung) ---
strategies:
FACT_WHEN:
description: "Abfrage von Zeitpunkten und Historie."
preferred_provider: "openrouter"
use_streams:
- "facts_stream"
- "biography_stream"
prompt_template: "fact_synthesis_v1"
FACT_WHAT:
description: "Abfrage von Definitionen und Wissen."
preferred_provider: "openrouter"
use_streams:
- "facts_stream"
- "tech_stream"
prompt_template: "fact_synthesis_v1"
DECISION:
description: "Der User sucht Rat, Strategie oder Abwägung."
preferred_provider: "gemini"
use_streams:
- "values_stream"
- "facts_stream"
- "risk_stream"
prompt_template: "decision_synthesis_v1"
prepend_instruction: "!!! ENTSCHEIDUNGS-MODUS (AGENTIC MULTI-STREAM) !!!"
EMPATHY:
description: "Reaktion auf emotionale Zustände."
preferred_provider: "openrouter"
use_streams:
- "biography_stream"
- "values_stream"
prompt_template: "empathy_template"
CODING:
description: "Technische Anfragen und Programmierung."
preferred_provider: "gemini"
use_streams:
- "tech_stream"
- "facts_stream"
prompt_template: "technical_template"
INTERVIEW:
description: "Der User möchte Wissen erfassen."
preferred_provider: "openrouter"
trigger_keywords:
- "neue notiz"
- "etwas notieren"
- "festhalten"
- "erstellen"
- "dokumentieren"
- "anlegen"
- "interview"
- "erfassen"
- "idee speichern"
- "draft"
inject_types: []
edge_boosts: {}
prompt_template: "interview_template"
prepend_instruction: null
preferred_provider: "openrouter"
use_streams: []
prompt_template: "interview_template"

View File

@ -1,7 +1,6 @@
# config/prompts.yaml — Final V2.6.0 (WP-15b Candidate-Validation)
# WP-20: Optimierte Cloud-Templates zur Unterdrückung von Modell-Geschwätz.
# FIX: Explizite Verbote für Einleitungstexte zur Vermeidung von JSON-Parsing-Fehlern.
# WP-15b: Integration der binären edge_validation für den Two-Pass Workflow.
# config/prompts.yaml — VERSION 3.0.0 (WP-25: Multi-Stream Agentic RAG)
# WP-20/22: Cloud-Templates & Semantic Graph Routing erhalten.
# WP-25: Integration der Multi-Stream Synthese zur Vermeidung von Halluzinationen.
# OLLAMA: UNVERÄNDERT laut Benutzeranweisung.
system_prompt: |
@ -270,4 +269,88 @@ edge_validation:
QUELLE: {chunk_text}
ZIEL: {target_title} ({target_summary})
BEZIEHUNG: {edge_kind}
Ist diese Verbindung valide? Antworte NUR mit YES oder NO.
Ist diese Verbindung valide? Antworte NUR mit YES oder NO.
# ---------------------------------------------------------
# 9. WP-25: MULTI-STREAM SYNTHESIS (Intent: SYNTHESIS)
# ---------------------------------------------------------
# Diese Templates verarbeiten die Ergebnisse aus parallelen Such-Streams.
decision_synthesis_v1:
gemini: |
Agiere als mein strategischer Partner. Analysiere die Frage: {query}
Hier sind die Ergebnisse aus verschiedenen Wissens-Streams meiner Mindnet-Basis:
### STREAM: WERTE & PRINZIPIEN (Identität)
{values_stream}
### STREAM: OPERATIVE FAKTEN (Realität)
{facts_stream}
### STREAM: RISIKO-ANALYSE (Konsequenzen)
{risk_stream}
AUFGABE:
1. Fasse die Faktenlage kurz zusammen.
2. Wäge die Fakten hart gegen meine Werte ab. Gibt es Konflikte?
3. Beurteile das Vorhaben basierend auf dem Risiko-Radar.
4. Gib eine klare strategische Empfehlung ab.
openrouter: |
Strategische Multi-Stream Analyse für: {query}
Werte-Basis: {values_stream}
Fakten: {facts_stream}
Risiken: {risk_stream}
Bitte wäge ab und gib eine Empfehlung.
ollama: |
Du bist mein Entscheidungs-Partner. Analysiere {query} basierend auf diesen Streams:
WERTE: {values_stream}
FAKTEN: {facts_stream}
RISIKEN: {risk_stream}
Wäge die Fakten gegen die Werte ab und nenne potenzielle Risiken. Nenne Quellen!
fact_synthesis_v1:
gemini: |
Beantworte die Wissensabfrage "{query}" basierend auf diesen Streams:
FAKTEN: {facts_stream}
BIOGRAFIE/ERFAHRUNG: {biography_stream}
TECHNIK: {tech_stream}
Kombiniere harte Fakten mit persönlichen Erfahrungen, falls vorhanden.
openrouter: |
Synthese der Wissens-Streams für: {query}
Inhalt: {facts_stream} | {biography_stream} | {tech_stream}
ollama: |
Fasse das Wissen zu {query} zusammen.
QUELLE FAKTEN: {facts_stream}
QUELLE ERFAHRUNG: {biography_stream}
QUELLE TECHNIK: {tech_stream}
Antworte präzise und nenne die Quellen.
# ... (Vorherige Sektionen 1-9 bleiben identisch)
# ---------------------------------------------------------
# 10. WP-25: INTENT ROUTING (Intent: CLASSIFY)
# ---------------------------------------------------------
intent_router_v1:
ollama: |
Analysiere die Nutzeranfrage und wähle die passende Strategie.
Antworte NUR mit dem Namen der Strategie.
STRATEGIEN:
- FACT_WHEN: Fragen nach "Wann", Daten, Historie.
- FACT_WHAT: Fragen nach "Was", Definitionen, Wissen.
- DECISION: Rat, Meinung, "Soll ich?", Abwägung.
- EMPATHY: Emotionen, Reflexion, "Ich fühle mich...".
- CODING: Programmierung, Skripte, Debugging.
- INTERVIEW: Dokumentation von Gedanken, Notizen erstellen.
NACHRICHT: "{query}"
STRATEGIE:
gemini: |
Classify query intent for Mindnet. Options: [FACT_WHEN, FACT_WHAT, DECISION, EMPATHY, CODING, INTERVIEW].
Query: "{query}"
Result (One word only):
openrouter: |
Select the best Mindnet strategy for: "{query}".
Strategies: FACT_WHEN, FACT_WHAT, DECISION, EMPATHY, CODING, INTERVIEW.
Response:

View File

@ -1,4 +1,4 @@
# Release Notes: Mindnet v2.9.1 (WP15c)
# Release Notes: Mindnet v2.9.3 (WP15c)
**Release Date:** 2025-12-31
**Type:** Feature Release - Multigraph & Diversity Engine