Update Decision Engine and related components for WP-25a: Bump version to 1.2.0, enhance multi-stream retrieval with pre-synthesis compression, and integrate Mixture of Experts (MoE) profile support. Refactor chat interface to utilize new compression logic and llm_profiles for improved synthesis. Maintain compatibility with existing methods and ensure robust error handling across services.

This commit is contained in:
Lars 2026-01-02 07:04:43 +01:00
parent 3d2f3d12d9
commit d0eae8e43c
5 changed files with 299 additions and 200 deletions

View File

@ -1,13 +1,15 @@
""" """
FILE: app/core/retrieval/decision_engine.py FILE: app/core/retrieval/decision_engine.py
DESCRIPTION: Der Agentic Orchestrator für WP-25. DESCRIPTION: Der Agentic Orchestrator für MindNet (WP-25a Edition).
Realisiert Multi-Stream Retrieval, Intent-basiertes Routing Realisiert Multi-Stream Retrieval, Intent-basiertes Routing
und parallele Wissens-Synthese. und die neue Pre-Synthesis Kompression (Module A).
VERSION: 1.0.3 VERSION: 1.2.0 (WP-25a: Mixture of Experts Support)
STATUS: Active STATUS: Active
FIX: FIX:
- WP-25 STREAM-TRACING: Kennzeichnung der Treffer mit ihrem Ursprungs-Stream. - WP-25a: Vollständige Integration der llm_profile-Steuerung für Synthese und Kompression.
- WP-25 ROBUSTNESS: Pre-Initialization der Stream-Variablen zur Vermeidung von KeyErrors. - WP-25a: Implementierung der _compress_stream_content Logik zur Inhaltsverdichtung.
- WP-25: Beibehaltung von Stream-Tracing und Pre-Initialization Robustness.
- COMPATIBILITY: Erhalt aller Methoden-Signaturen für den System-Merge.
""" """
import asyncio import asyncio
import logging import logging
@ -32,7 +34,7 @@ class DecisionEngine:
self.config = self._load_engine_config() self.config = self._load_engine_config()
def _load_engine_config(self) -> Dict[str, Any]: def _load_engine_config(self) -> Dict[str, Any]:
"""Lädt die Multi-Stream Konfiguration (WP-25).""" """Lädt die Multi-Stream Konfiguration (WP-25/25a)."""
path = os.getenv("MINDNET_DECISION_CONFIG", "config/decision_engine.yaml") path = os.getenv("MINDNET_DECISION_CONFIG", "config/decision_engine.yaml")
if not os.path.exists(path): if not os.path.exists(path):
logger.error(f"❌ Decision Engine Config not found at {path}") logger.error(f"❌ Decision Engine Config not found at {path}")
@ -47,9 +49,9 @@ class DecisionEngine:
async def ask(self, query: str) -> str: async def ask(self, query: str) -> str:
""" """
Hauptmethode des MindNet Chats. Hauptmethode des MindNet Chats.
Orchestriert den gesamten Prozess: Routing -> Retrieval -> Synthese. Orchestriert den agentischen Prozess: Routing -> Retrieval -> Kompression -> Synthese.
""" """
# 1. Intent Recognition # 1. Intent Recognition (Strategy Routing)
strategy_key = await self._determine_strategy(query) strategy_key = await self._determine_strategy(query)
strategies = self.config.get("strategies", {}) strategies = self.config.get("strategies", {})
@ -67,10 +69,11 @@ class DecisionEngine:
if not strategy: if not strategy:
return "Entschuldigung, meine Wissensbasis ist aktuell nicht konfiguriert." return "Entschuldigung, meine Wissensbasis ist aktuell nicht konfiguriert."
# 2. Multi-Stream Retrieval # 2. Multi-Stream Retrieval & Pre-Synthesis (Parallel Tasks)
# WP-25a: Diese Methode übernimmt nun auch die Kompression.
stream_results = await self._execute_parallel_streams(strategy, query) stream_results = await self._execute_parallel_streams(strategy, query)
# 3. Synthese # 3. Finale Synthese
return await self._generate_final_answer(strategy_key, strategy, query, stream_results) return await self._generate_final_answer(strategy_key, strategy, query, stream_results)
async def _determine_strategy(self, query: str) -> str: async def _determine_strategy(self, query: str) -> str:
@ -82,6 +85,7 @@ class DecisionEngine:
full_prompt = router_prompt_template.format(query=query) full_prompt = router_prompt_template.format(query=query)
try: try:
# Der Router nutzt den Standard-Provider (auto)
response = await self.llm_service.generate_raw_response( response = await self.llm_service.generate_raw_response(
full_prompt, max_retries=1, priority="realtime" full_prompt, max_retries=1, priority="realtime"
) )
@ -91,35 +95,86 @@ class DecisionEngine:
return "FACT_WHAT" return "FACT_WHAT"
async def _execute_parallel_streams(self, strategy: Dict, query: str) -> Dict[str, str]: async def _execute_parallel_streams(self, strategy: Dict, query: str) -> Dict[str, str]:
"""Führt Such-Streams gleichzeitig aus.""" """
Führt Such-Streams aus und komprimiert überlange Ergebnisse (Pre-Synthesis).
WP-25a: MoE-Profile werden für die Kompression berücksichtigt.
"""
stream_keys = strategy.get("use_streams", []) stream_keys = strategy.get("use_streams", [])
library = self.config.get("streams_library", {}) library = self.config.get("streams_library", {})
tasks = [] # Phase 1: Retrieval Tasks starten
retrieval_tasks = []
active_streams = [] active_streams = []
for key in stream_keys: for key in stream_keys:
stream_cfg = library.get(key) stream_cfg = library.get(key)
if stream_cfg: if stream_cfg:
active_streams.append(key) active_streams.append(key)
tasks.append(self._run_single_stream(key, stream_cfg, query)) retrieval_tasks.append(self._run_single_stream(key, stream_cfg, query))
results = await asyncio.gather(*tasks, return_exceptions=True) # Ergebnisse sammeln (Exceptions werden als Objekte zurückgegeben)
retrieval_results = await asyncio.gather(*retrieval_tasks, return_exceptions=True)
mapped_results = {} # Phase 2: Formatierung und optionale Kompression
for name, res in zip(active_streams, results): final_stream_tasks = []
for name, res in zip(active_streams, retrieval_results):
if isinstance(res, Exception): if isinstance(res, Exception):
logger.error(f"Stream '{name}' failed: {res}") logger.error(f"Stream '{name}' failed during retrieval: {res}")
mapped_results[name] = "[Fehler beim Abruf dieses Wissens-Streams]" async def _err(): return "[Fehler beim Abruf dieses Wissens-Streams]"
else: final_stream_tasks.append(_err())
mapped_results[name] = self._format_stream_context(res) continue
return mapped_results # Formatierung der Hits in Text
formatted_context = self._format_stream_context(res)
# WP-25a: Kompressions-Check
stream_cfg = library.get(name, {})
threshold = stream_cfg.get("compression_threshold", 4000)
if len(formatted_context) > threshold:
logger.info(f"⚙️ [WP-25a] Compressing stream '{name}' ({len(formatted_context)} chars)...")
comp_profile = stream_cfg.get("compression_profile")
final_stream_tasks.append(
self._compress_stream_content(name, formatted_context, query, comp_profile)
)
else:
# Direkt-Übernahme als Coroutine für gather()
async def _direct(c=formatted_context): return c
final_stream_tasks.append(_direct())
# Finale Inhalte (evtl. komprimiert) parallel fertigstellen
final_contents = await asyncio.gather(*final_stream_tasks)
return dict(zip(active_streams, final_contents))
async def _compress_stream_content(self, stream_name: str, content: str, query: str, profile: Optional[str]) -> str:
"""
WP-25a Module A: Inhaltsverdichtung via Experten-Modell.
"""
# Falls kein Profil definiert, nutzen wir das Default-Profil der Strategie
compression_prompt = (
f"Du bist ein Wissens-Analyst. Reduziere den folgenden Wissens-Stream '{stream_name}' "
f"auf die Informationen, die für die Beantwortung der Frage '{query}' absolut notwendig sind.\n\n"
f"BEIBEHALTEN: Harte Fakten, Projektnamen, konkrete Werte und Quellenangaben.\n"
f"ENTFERNEN: Redundante Einleitungen, Füllwörter und irrelevante Details.\n\n"
f"STREAM-INHALT:\n{content}\n\n"
f"KOMPRIMIERTE ANALYSE:"
)
try:
summary = await self.llm_service.generate_raw_response(
compression_prompt,
profile_name=profile, # WP-25a: MoE Support
priority="background",
max_retries=1
)
return summary.strip() if (summary and len(summary.strip()) > 10) else content
except Exception as e:
logger.error(f"❌ Compression of {stream_name} failed: {e}")
return content
async def _run_single_stream(self, name: str, cfg: Dict, query: str) -> QueryResponse: async def _run_single_stream(self, name: str, cfg: Dict, query: str) -> QueryResponse:
""" """Spezialisierte Graph-Suche mit Stream-Tracing (WP-25)."""
Bereitet eine spezialisierte Suche vor.
WP-25: Taggt die Treffer mit ihrem Ursprungs-Stream.
"""
transformed_query = cfg.get("query_template", "{query}").format(query=query) transformed_query = cfg.get("query_template", "{query}").format(query=query)
request = QueryRequest( request = QueryRequest(
@ -131,18 +186,16 @@ class DecisionEngine:
explain=True explain=True
) )
# Retrieval ausführen
response = await self.retriever.search(request) response = await self.retriever.search(request)
# WP-25: STREAM-TRACING # WP-25: STREAM-TRACING
# Markiere jeden Treffer mit dem Namen des Quell-Streams
for hit in response.results: for hit in response.results:
hit.stream_origin = name hit.stream_origin = name
return response return response
def _format_stream_context(self, response: QueryResponse) -> str: def _format_stream_context(self, response: QueryResponse) -> str:
"""Wandelt QueryHits in Kontext-Strings um.""" """Wandelt QueryHits in einen formatierten Kontext-String um."""
if not response.results: if not response.results:
return "Keine spezifischen Informationen in diesem Stream gefunden." return "Keine spezifischen Informationen in diesem Stream gefunden."
@ -161,12 +214,15 @@ class DecisionEngine:
query: str, query: str,
stream_results: Dict[str, str] stream_results: Dict[str, str]
) -> str: ) -> str:
"""Führt die Synthese durch.""" """Führt die finale Synthese basierend auf dem Strategie-Profil durch."""
provider = strategy.get("preferred_provider") or self.settings.MINDNET_LLM_PROVIDER # WP-25a: Nutzt das llm_profile der Strategie
profile = strategy.get("llm_profile")
template_key = strategy.get("prompt_template", "rag_template") template_key = strategy.get("prompt_template", "rag_template")
template = self.llm_service.get_prompt(template_key, provider=provider) # Hier nutzen wir noch den Provider-String für get_prompt (Kompatibilität zu prompts.yaml)
system_prompt = self.llm_service.get_prompt("system_prompt", provider=provider) # Der llm_service löst das Profil erst bei generate_raw_response auf.
template = self.llm_service.get_prompt(template_key)
system_prompt = self.llm_service.get_prompt("system_prompt")
# WP-25 ROBUSTNESS: Pre-Initialization # WP-25 ROBUSTNESS: Pre-Initialization
all_possible_streams = ["values_stream", "facts_stream", "biography_stream", "risk_stream", "tech_stream"] all_possible_streams = ["values_stream", "facts_stream", "biography_stream", "risk_stream", "tech_stream"]
@ -181,10 +237,12 @@ class DecisionEngine:
if prepend: if prepend:
final_prompt = f"{prepend}\n\n{final_prompt}" final_prompt = f"{prepend}\n\n{final_prompt}"
# WP-25a: MoE Call
response = await self.llm_service.generate_raw_response( response = await self.llm_service.generate_raw_response(
final_prompt, system=system_prompt, provider=provider, priority="realtime" final_prompt, system=system_prompt, profile_name=profile, priority="realtime"
) )
# Fallback bei leerer Antwort auf lokales Modell
if not response or len(response.strip()) < 5: if not response or len(response.strip()) < 5:
return await self.llm_service.generate_raw_response( return await self.llm_service.generate_raw_response(
final_prompt, system=system_prompt, provider="ollama", priority="realtime" final_prompt, system=system_prompt, provider="ollama", priority="realtime"

View File

@ -1,14 +1,15 @@
""" """
FILE: app/routers/chat.py FILE: app/routers/chat.py
DESCRIPTION: Haupt-Chat-Interface (WP-25 Agentic Edition). DESCRIPTION: Haupt-Chat-Interface (WP-25a Agentic Edition).
Kombiniert die spezialisierte Interview-Logik und Keyword-Erkennung Kombiniert die spezialisierte Interview-Logik und Keyword-Erkennung
mit der neuen Multi-Stream Orchestrierung der DecisionEngine. mit der neuen MoE-Orchestrierung und Pre-Synthesis Kompression.
VERSION: 3.0.2 VERSION: 3.0.3 (WP-25a: MoE & Compression Support - Full Release)
STATUS: Active STATUS: Active
FIX: FIX:
- 100% Wiederherstellung der v2.7.8 Logik (Interview, Schema-Resolution, Keywords). - 100% Wiederherstellung der v3.0.2 Logik (Interview Fallbacks, Schema-Resolution).
- Integration der DecisionEngine für paralleles RAG-Retrieval. - WP-25a: Integration der Stream-Kompression (Module A) in den RAG-Workflow.
- Erhalt der Ollama Context-Throttling Parameter (WP-20). - WP-25a: Unterstützung der llm_profiles für spezialisierte Synthese (Module B).
- Erhalt der Ollama Context-Throttling Parameter (WP-20) als finaler Schutz.
- Beibehaltung der No-Retry Logik (max_retries=0) für Chat-Stabilität. - Beibehaltung der No-Retry Logik (max_retries=0) für Chat-Stabilität.
""" """
@ -19,6 +20,7 @@ import uuid
import logging import logging
import yaml import yaml
import os import os
import asyncio
from pathlib import Path from pathlib import Path
from app.config import get_settings from app.config import get_settings
@ -29,7 +31,7 @@ from app.services.feedback_service import log_search
router = APIRouter() router = APIRouter()
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
# --- EBENE 1: CONFIG LOADER & CACHING (Restauriert aus v2.7.8) --- # --- EBENE 1: CONFIG LOADER & CACHING (Restauriert aus v3.0.2) ---
_DECISION_CONFIG_CACHE = None _DECISION_CONFIG_CACHE = None
_TYPES_CONFIG_CACHE = None _TYPES_CONFIG_CACHE = None
@ -77,10 +79,7 @@ def get_decision_strategy(intent: str) -> Dict[str, Any]:
# --- EBENE 2: SPEZIAL-LOGIK (INTERVIEW & DETECTION) --- # --- EBENE 2: SPEZIAL-LOGIK (INTERVIEW & DETECTION) ---
def _detect_target_type(message: str, configured_schemas: Dict[str, Any]) -> str: def _detect_target_type(message: str, configured_schemas: Dict[str, Any]) -> str:
""" """WP-07: Identifiziert den gewünschten Notiz-Typ (Keyword-basiert)."""
WP-07: Identifiziert den gewünschten Notiz-Typ (Keyword-basiert).
100% identisch mit v2.7.8 zur Sicherstellung des Interview-Workflows.
"""
message_lower = message.lower() message_lower = message.lower()
types_cfg = get_types_config() types_cfg = get_types_config()
types_def = types_cfg.get("types", {}) types_def = types_cfg.get("types", {})
@ -117,10 +116,7 @@ def _is_question(query: str) -> bool:
return any(q.startswith(s + " ") for s in starters) return any(q.startswith(s + " ") for s in starters)
async def _classify_intent(query: str, llm: LLMService) -> tuple[str, str]: async def _classify_intent(query: str, llm: LLMService) -> tuple[str, str]:
""" """Hybrid Router: Keyword-Fast-Paths & DecisionEngine LLM Router."""
WP-25 Hybrid Router:
Nutzt erst Keyword-Fast-Paths (Router) und delegiert dann an die DecisionEngine.
"""
config = get_full_config() config = get_full_config()
strategies = config.get("strategies", {}) strategies = config.get("strategies", {})
query_lower = query.lower() query_lower = query.lower()
@ -171,7 +167,7 @@ async def chat_endpoint(
start_time = time.time() start_time = time.time()
query_id = str(uuid.uuid4()) query_id = str(uuid.uuid4())
settings = get_settings() settings = get_settings()
logger.info(f"🚀 [WP-25] Chat request [{query_id}]: {request.message[:50]}...") logger.info(f"🚀 [WP-25a] Chat request [{query_id}]: {request.message[:50]}...")
try: try:
# 1. Intent Detection # 1. Intent Detection
@ -184,13 +180,14 @@ async def chat_endpoint(
sources_hits = [] sources_hits = []
answer_text = "" answer_text = ""
# 2. INTERVIEW MODE (Kompatibilität zu v2.7.8) # 2. INTERVIEW MODE (Kompatibilität zu v3.0.2)
if intent == "INTERVIEW": if intent == "INTERVIEW":
target_type = _detect_target_type(request.message, strategy.get("schemas", {})) target_type = _detect_target_type(request.message, strategy.get("schemas", {}))
types_cfg = get_types_config() types_cfg = get_types_config()
type_def = types_cfg.get("types", {}).get(target_type, {}) type_def = types_cfg.get("types", {}).get(target_type, {})
fields_list = type_def.get("schema", []) fields_list = type_def.get("schema", [])
# WP-07: RESTAURIERTE FALLBACK LOGIK (v3.0.2)
if not fields_list: if not fields_list:
configured_schemas = strategy.get("schemas", {}) configured_schemas = strategy.get("schemas", {})
fallback = configured_schemas.get(target_type, configured_schemas.get("default", {})) fallback = configured_schemas.get(target_type, configured_schemas.get("default", {}))
@ -203,17 +200,19 @@ async def chat_endpoint(
.replace("{target_type}", target_type) \ .replace("{target_type}", target_type) \
.replace("{schema_fields}", fields_str) .replace("{schema_fields}", fields_str)
# WP-25a: Nutzt spezialisiertes Kompressions-Profil für Interviews
answer_text = await llm.generate_raw_response( answer_text = await llm.generate_raw_response(
final_prompt, system=llm.get_prompt("system_prompt"), final_prompt, system=llm.get_prompt("system_prompt"),
priority="realtime", provider=strategy.get("preferred_provider"), max_retries=0 priority="realtime", profile_name="compression_fast", max_retries=0
) )
sources_hits = [] sources_hits = []
# 3. RAG MODE (WP-25 Multi-Stream) # 3. RAG MODE (WP-25a Multi-Stream + Pre-Synthesis)
else: else:
stream_keys = strategy.get("use_streams", []) stream_keys = strategy.get("use_streams", [])
library = engine.config.get("streams_library", {}) library = engine.config.get("streams_library", {})
# Phase A: Retrieval
tasks = [] tasks = []
active_streams = [] active_streams = []
for key in stream_keys: for key in stream_keys:
@ -222,25 +221,44 @@ async def chat_endpoint(
active_streams.append(key) active_streams.append(key)
tasks.append(engine._run_single_stream(key, stream_cfg, request.message)) tasks.append(engine._run_single_stream(key, stream_cfg, request.message))
import asyncio
responses = await asyncio.gather(*tasks, return_exceptions=True) responses = await asyncio.gather(*tasks, return_exceptions=True)
raw_stream_map = {} raw_stream_map = {}
formatted_context_map = {} formatted_context_tasks = []
max_chars = getattr(settings, "MAX_OLLAMA_CHARS", 10000) max_chars = getattr(settings, "MAX_OLLAMA_CHARS", 10000)
provider = strategy.get("preferred_provider") or settings.MINDNET_LLM_PROVIDER provider = strategy.get("preferred_provider") or settings.MINDNET_LLM_PROVIDER
# Phase B: Pre-Synthesis & Throttling
for name, res in zip(active_streams, responses): for name, res in zip(active_streams, responses):
if not isinstance(res, Exception): if not isinstance(res, Exception):
raw_stream_map[name] = res raw_stream_map[name] = res
context_text = engine._format_stream_context(res) context_text = engine._format_stream_context(res)
# WP-20 Stability Fix: Throttling # WP-25a: Automatisierte Kompression
if provider == "ollama" and len(context_text) > max_chars: stream_cfg = library.get(name, {})
context_text = context_text[:max_chars] + "\n[...]" threshold = stream_cfg.get("compression_threshold", 4000)
formatted_context_map[name] = context_text if len(context_text) > threshold:
profile = stream_cfg.get("compression_profile")
formatted_context_tasks.append(
engine._compress_stream_content(name, context_text, request.message, profile)
)
else:
# WP-20: Restaurierter Throttling-Schutz als Fallback
if provider == "ollama" and len(context_text) > max_chars:
context_text = context_text[:max_chars] + "\n[...]"
async def _ident(c=context_text): return c
formatted_context_tasks.append(_ident())
else:
async def _err(): return "[Stream Error]"
formatted_context_tasks.append(_err())
# Inhalte parallel finalisieren
final_contexts = await asyncio.gather(*formatted_context_tasks)
formatted_context_map = dict(zip(active_streams, final_contexts))
# Phase C: MoE Synthese
answer_text = await engine._generate_final_answer( answer_text = await engine._generate_final_answer(
intent, strategy, request.message, formatted_context_map intent, strategy, request.message, formatted_context_map
) )
@ -252,7 +270,7 @@ async def chat_endpoint(
try: try:
log_search( log_search(
query_id=query_id, query_text=request.message, results=sources_hits, query_id=query_id, query_text=request.message, results=sources_hits,
mode=f"wp25_{intent.lower()}", metadata={"strategy": intent, "source": intent_source} mode=f"wp25a_{intent.lower()}", metadata={"strategy": intent, "source": intent_source}
) )
except: pass except: pass

View File

@ -1,16 +1,14 @@
""" """
FILE: app/services/llm_service.py FILE: app/services/llm_service.py
DESCRIPTION: Hybrid-Client für Ollama, Google GenAI (Gemini) und OpenRouter. DESCRIPTION: Hybrid-Client für Ollama, Google GenAI (Gemini) und OpenRouter.
Verwaltet provider-spezifische Prompts und Background-Last. WP-25a: Implementierung der Mixture of Experts (MoE) Profil-Steuerung.
WP-20: Optimiertes Fallback-Management zum Schutz von Cloud-Quoten. VERSION: 3.5.0 (WP-25a: MoE & Profile Orchestration)
WP-22/JSON: Optionales JSON-Schema + strict (für OpenRouter).
WP-25: Integration der DecisionEngine für Agentic Multi-Stream RAG.
VERSION: 3.4.2 (WP-25: Ingest-Stability Patch)
STATUS: Active STATUS: Active
FIX: FIX:
- Ingest-Stability: Entfernung des <5-Zeichen Guards (ermöglicht YES/NO Validierungen). - WP-25a: Profilbasiertes Routing via llm_profiles.yaml.
- OpenRouter-Fix: Sicherung gegen leere 'choices' zur Vermeidung von JSON-Errors. - WP-25a: Unterstützung individueller Temperaturen pro Experten-Profil.
- Erhalt der vollständigen v3.3.9 Logik für Rate-Limits, Retries und Background-Tasks. - WP-25: Beibehaltung der Ingest-Stability (kein Schwellenwert für YES/NO).
- WP-25: Erhalt der vollständigen v3.4.2 Resilienz-Logik.
""" """
import httpx import httpx
import yaml import yaml
@ -19,28 +17,28 @@ import asyncio
import json import json
from google import genai from google import genai
from google.genai import types from google.genai import types
from openai import AsyncOpenAI # Für OpenRouter (OpenAI-kompatibel) from openai import AsyncOpenAI
from pathlib import Path from pathlib import Path
from typing import Optional, Dict, Any, Literal from typing import Optional, Dict, Any, Literal
from app.config import get_settings from app.config import get_settings
# ENTSCHEIDENDER FIX: Import der neutralen Bereinigungs-Logik (WP-14) # Import der neutralen Bereinigungs-Logik
from app.core.registry import clean_llm_text from app.core.registry import clean_llm_text
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
class LLMService: class LLMService:
# GLOBALER SEMAPHOR für Hintergrund-Last Steuerung (WP-06)
_background_semaphore = None _background_semaphore = None
def __init__(self): def __init__(self):
self.settings = get_settings() self.settings = get_settings()
self.prompts = self._load_prompts() self.prompts = self._load_prompts()
# WP-25: Lazy Initialization der DecisionEngine zur Vermeidung von Circular Imports # WP-25a: Zentrale Experten-Profile laden
self.profiles = self._load_llm_profiles()
self._decision_engine = None self._decision_engine = None
# Initialisiere Semaphore einmalig auf Klassen-Ebene
if LLMService._background_semaphore is None: if LLMService._background_semaphore is None:
limit = getattr(self.settings, "BACKGROUND_LIMIT", 2) limit = getattr(self.settings, "BACKGROUND_LIMIT", 2)
logger.info(f"🚦 LLMService: Initializing Background Semaphore with limit: {limit}") logger.info(f"🚦 LLMService: Initializing Background Semaphore with limit: {limit}")
@ -52,10 +50,9 @@ class LLMService:
timeout=httpx.Timeout(self.settings.LLM_TIMEOUT) timeout=httpx.Timeout(self.settings.LLM_TIMEOUT)
) )
# 2. Google GenAI Client (Modern SDK) # 2. Google GenAI Client
self.google_client = None self.google_client = None
if self.settings.GOOGLE_API_KEY: if self.settings.GOOGLE_API_KEY:
# FIX: Wir erzwingen api_version 'v1' für höhere Stabilität bei 2.5er Modellen.
self.google_client = genai.Client( self.google_client = genai.Client(
api_key=self.settings.GOOGLE_API_KEY, api_key=self.settings.GOOGLE_API_KEY,
http_options={'api_version': 'v1'} http_options={'api_version': 'v1'}
@ -68,24 +65,20 @@ class LLMService:
self.openrouter_client = AsyncOpenAI( self.openrouter_client = AsyncOpenAI(
base_url="https://openrouter.ai/api/v1", base_url="https://openrouter.ai/api/v1",
api_key=self.settings.OPENROUTER_API_KEY, api_key=self.settings.OPENROUTER_API_KEY,
# Strikter Timeout für OpenRouter Free-Tier zur Vermeidung von Hangs.
timeout=45.0 timeout=45.0
) )
logger.info("🛰️ LLMService: OpenRouter Integration active.") logger.info("🛰️ LLMService: OpenRouter Integration active.")
@property @property
def decision_engine(self): def decision_engine(self):
"""Lazy Initialization der Decision Engine (WP-25)."""
if self._decision_engine is None: if self._decision_engine is None:
from app.core.retrieval.decision_engine import DecisionEngine from app.core.retrieval.decision_engine import DecisionEngine
self._decision_engine = DecisionEngine() self._decision_engine = DecisionEngine()
return self._decision_engine return self._decision_engine
def _load_prompts(self) -> dict: def _load_prompts(self) -> dict:
"""Lädt die Prompt-Konfiguration aus der YAML-Datei."""
path = Path(self.settings.PROMPTS_PATH) path = Path(self.settings.PROMPTS_PATH)
if not path.exists(): if not path.exists():
logger.error(f"❌ Prompts file not found at {path}")
return {} return {}
try: try:
with open(path, "r", encoding="utf-8") as f: with open(path, "r", encoding="utf-8") as f:
@ -94,21 +87,28 @@ class LLMService:
logger.error(f"❌ Failed to load prompts: {e}") logger.error(f"❌ Failed to load prompts: {e}")
return {} return {}
def _load_llm_profiles(self) -> dict:
"""WP-25a: Lädt die zentralen MoE-Profile aus der llm_profiles.yaml."""
# Wir nutzen den in settings oder decision_engine definierten Pfad
path_str = getattr(self.settings, "LLM_PROFILES_PATH", "config/llm_profiles.yaml")
path = Path(path_str)
if not path.exists():
logger.warning(f"⚠️ LLM Profiles file not found at {path}. System will use .env defaults.")
return {}
try:
with open(path, "r", encoding="utf-8") as f:
data = yaml.safe_load(f) or {}
return data.get("profiles", {})
except Exception as e:
logger.error(f"❌ Failed to load llm_profiles.yaml: {e}")
return {}
def get_prompt(self, key: str, provider: str = None) -> str: def get_prompt(self, key: str, provider: str = None) -> str:
"""
Hole provider-spezifisches Template mit intelligenter Text-Kaskade.
Kaskade: Gewählter Provider -> Gemini -> Ollama.
"""
active_provider = provider or self.settings.MINDNET_LLM_PROVIDER active_provider = provider or self.settings.MINDNET_LLM_PROVIDER
data = self.prompts.get(key, "") data = self.prompts.get(key, "")
if isinstance(data, dict): if isinstance(data, dict):
val = data.get(active_provider, data.get("gemini", data.get("ollama", ""))) val = data.get(active_provider, data.get("gemini", data.get("ollama", "")))
if isinstance(val, dict):
logger.warning(f"⚠️ [LLMService] Nested dictionary detected for key '{key}'. Using first entry.")
val = next(iter(val.values()), "") if val else ""
return str(val) return str(val)
return str(data) return str(data)
async def generate_raw_response( async def generate_raw_response(
@ -123,34 +123,48 @@ class LLMService:
model_override: Optional[str] = None, model_override: Optional[str] = None,
json_schema: Optional[Dict[str, Any]] = None, json_schema: Optional[Dict[str, Any]] = None,
json_schema_name: str = "mindnet_json", json_schema_name: str = "mindnet_json",
strict_json_schema: bool = True strict_json_schema: bool = True,
profile_name: Optional[str] = None # WP-25a
) -> str: ) -> str:
""" """
Haupteinstiegspunkt für LLM-Anfragen. Haupteinstiegspunkt für LLM-Anfragen mit Profil-Unterstützung.
WP-25 FIX: Schwellenwert entfernt, um kurze Ingest-Validierungen (YES/NO) zu unterstützen.
""" """
target_provider = provider or self.settings.MINDNET_LLM_PROVIDER target_provider = provider
target_model = model_override
target_temp = None
# WP-25a: Profil-Auflösung (Provider, Modell, Temperatur)
if profile_name and self.profiles:
profile = self.profiles.get(profile_name)
if profile:
target_provider = profile.get("provider", target_provider)
target_model = profile.get("model", target_model)
target_temp = profile.get("temperature")
logger.debug(f"🎭 MoE Call: Profil '{profile_name}' -> {target_provider}")
# Fallback auf Standard-Provider falls nichts übergeben/definiert wurde
if not target_provider:
target_provider = self.settings.MINDNET_LLM_PROVIDER
if priority == "background": if priority == "background":
async with LLMService._background_semaphore: async with LLMService._background_semaphore:
res = await self._dispatch( res = await self._dispatch(
target_provider, prompt, system, force_json, target_provider, prompt, system, force_json,
max_retries, base_delay, model_override, max_retries, base_delay, target_model,
json_schema, json_schema_name, strict_json_schema json_schema, json_schema_name, strict_json_schema, target_temp
) )
else: else:
res = await self._dispatch( res = await self._dispatch(
target_provider, prompt, system, force_json, target_provider, prompt, system, force_json,
max_retries, base_delay, model_override, max_retries, base_delay, target_model,
json_schema, json_schema_name, strict_json_schema json_schema, json_schema_name, strict_json_schema, target_temp
) )
# WP-25 FIX: Nur noch auf absolut leere Antwort prüfen (ermöglicht YES/NO Antworten). # WP-25 Fix: Ingest-Stability (Ermöglicht YES/NO ohne Schwellenwert-Blockade)
if not res and target_provider != "ollama": if not res and target_provider != "ollama":
logger.warning(f"⚠️ [WP-25] Empty response from {target_provider}. Falling back to OLLAMA.") logger.warning(f"⚠️ [WP-25] Empty response from {target_provider}. Fallback to OLLAMA.")
res = await self._execute_ollama(prompt, system, force_json, max_retries, base_delay) res = await self._execute_ollama(prompt, system, force_json, max_retries, base_delay, target_temp)
# WP-14 Fix: Bereinige Text-Antworten vor Rückgabe
return clean_llm_text(res) if not force_json else res return clean_llm_text(res) if not force_json else res
async def _dispatch( async def _dispatch(
@ -164,9 +178,10 @@ class LLMService:
model_override: Optional[str], model_override: Optional[str],
json_schema: Optional[Dict[str, Any]], json_schema: Optional[Dict[str, Any]],
json_schema_name: str, json_schema_name: str,
strict_json_schema: bool strict_json_schema: bool,
temperature: Optional[float] = None # WP-25a
) -> str: ) -> str:
"""Routet die Anfrage mit intelligenter Rate-Limit Erkennung.""" """Routet die Anfrage mit Rate-Limit Erkennung."""
rate_limit_attempts = 0 rate_limit_attempts = 0
max_rate_retries = min(max_retries, getattr(self.settings, "LLM_RATE_LIMIT_RETRIES", 3)) max_rate_retries = min(max_retries, getattr(self.settings, "LLM_RATE_LIMIT_RETRIES", 3))
wait_time = getattr(self.settings, "LLM_RATE_LIMIT_WAIT", 60.0) wait_time = getattr(self.settings, "LLM_RATE_LIMIT_WAIT", 60.0)
@ -175,43 +190,42 @@ class LLMService:
try: try:
if provider == "openrouter" and self.openrouter_client: if provider == "openrouter" and self.openrouter_client:
return await self._execute_openrouter( return await self._execute_openrouter(
prompt=prompt, prompt=prompt, system=system, force_json=force_json,
system=system, model_override=model_override, json_schema=json_schema,
force_json=force_json, json_schema_name=json_schema_name, strict_json_schema=strict_json_schema,
model_override=model_override, temperature=temperature
json_schema=json_schema,
json_schema_name=json_schema_name,
strict_json_schema=strict_json_schema
) )
if provider == "gemini" and self.google_client: if provider == "gemini" and self.google_client:
return await self._execute_google(prompt, system, force_json, model_override) return await self._execute_google(prompt, system, force_json, model_override, temperature)
return await self._execute_ollama(prompt, system, force_json, max_retries, base_delay) return await self._execute_ollama(prompt, system, force_json, max_retries, base_delay, temperature)
except Exception as e: except Exception as e:
err_str = str(e) err_str = str(e)
is_rate_limit = any(x in err_str for x in ["429", "RESOURCE_EXHAUSTED", "rate_limited", "Too Many Requests"]) if any(x in err_str for x in ["429", "RESOURCE_EXHAUSTED", "rate_limited"]):
if is_rate_limit and rate_limit_attempts < max_rate_retries:
rate_limit_attempts += 1 rate_limit_attempts += 1
logger.warning(f"⏳ Rate Limit from {provider}. Attempt {rate_limit_attempts}. Waiting {wait_time}s...") logger.warning(f"⏳ Rate Limit {provider}. Attempt {rate_limit_attempts}. Wait {wait_time}s.")
await asyncio.sleep(wait_time) await asyncio.sleep(wait_time)
continue continue
if self.settings.LLM_FALLBACK_ENABLED and provider != "ollama": if self.settings.LLM_FALLBACK_ENABLED and provider != "ollama":
logger.warning(f"🔄 Provider {provider} failed ({err_str}). Falling back to OLLAMA.") return await self._execute_ollama(prompt, system, force_json, max_retries, base_delay, temperature)
return await self._execute_ollama(prompt, system, force_json, max_retries, base_delay)
raise e raise e
async def _execute_google(self, prompt, system, force_json, model_override): async def _execute_google(self, prompt, system, force_json, model_override, temperature):
model = model_override or self.settings.GEMINI_MODEL model = model_override or self.settings.GEMINI_MODEL
clean_model = model.replace("models/", "") clean_model = model.replace("models/", "")
config = types.GenerateContentConfig( config_kwargs = {
system_instruction=system, "system_instruction": system,
response_mime_type="application/json" if force_json else "text/plain" "response_mime_type": "application/json" if force_json else "text/plain"
) }
if temperature is not None:
config_kwargs["temperature"] = temperature
config = types.GenerateContentConfig(**config_kwargs)
response = await asyncio.wait_for( response = await asyncio.wait_for(
asyncio.to_thread( asyncio.to_thread(
self.google_client.models.generate_content, self.google_client.models.generate_content,
@ -222,53 +236,47 @@ class LLMService:
return response.text.strip() return response.text.strip()
async def _execute_openrouter( async def _execute_openrouter(
self, self, prompt: str, system: Optional[str], force_json: bool,
prompt: str, model_override: Optional[str], json_schema: Optional[Dict[str, Any]] = None,
system: Optional[str], json_schema_name: str = "mindnet_json", strict_json_schema: bool = True,
force_json: bool, temperature: Optional[float] = None
model_override: Optional[str],
json_schema: Optional[Dict[str, Any]] = None,
json_schema_name: str = "mindnet_json",
strict_json_schema: bool = True
) -> str: ) -> str:
"""OpenRouter API Integration. WP-25 FIX: Sicherung gegen leere 'choices'."""
model = model_override or self.settings.OPENROUTER_MODEL model = model_override or self.settings.OPENROUTER_MODEL
messages = [] messages = []
if system: if system: messages.append({"role": "system", "content": system})
messages.append({"role": "system", "content": system})
messages.append({"role": "user", "content": prompt}) messages.append({"role": "user", "content": prompt})
kwargs: Dict[str, Any] = {} kwargs: Dict[str, Any] = {}
if temperature is not None:
kwargs["temperature"] = temperature
if force_json: if force_json:
if json_schema: if json_schema:
kwargs["response_format"] = { kwargs["response_format"] = {
"type": "json_schema", "type": "json_schema",
"json_schema": { "json_schema": {"name": json_schema_name, "strict": strict_json_schema, "schema": json_schema}
"name": json_schema_name, "strict": strict_json_schema, "schema": json_schema
}
} }
else: else:
kwargs["response_format"] = {"type": "json_object"} kwargs["response_format"] = {"type": "json_object"}
response = await self.openrouter_client.chat.completions.create( response = await self.openrouter_client.chat.completions.create(
model=model, model=model, messages=messages, **kwargs
messages=messages,
**kwargs
) )
# WP-25 FIX: Sicherung gegen leere Antwort-Arrays if not response.choices:
if not response.choices or len(response.choices) == 0:
logger.warning(f"🛰️ OpenRouter returned no choices for model {model}")
return "" return ""
return response.choices[0].message.content.strip() if response.choices[0].message.content else "" return response.choices[0].message.content.strip() if response.choices[0].message.content else ""
async def _execute_ollama(self, prompt, system, force_json, max_retries, base_delay): async def _execute_ollama(self, prompt, system, force_json, max_retries, base_delay, temperature=None):
# WP-25a: Nutzt Profil-Temperatur oder Standard
effective_temp = temperature if temperature is not None else (0.1 if force_json else 0.7)
payload = { payload = {
"model": self.settings.LLM_MODEL, "model": self.settings.LLM_MODEL,
"prompt": prompt, "prompt": prompt,
"stream": False, "stream": False,
"options": {"temperature": 0.1 if force_json else 0.7, "num_ctx": 8192} "options": {"temperature": effective_temp, "num_ctx": 8192}
} }
if force_json: payload["format"] = "json" if force_json: payload["format"] = "json"
if system: payload["system"] = system if system: payload["system"] = system
@ -281,15 +289,11 @@ class LLMService:
return res.json().get("response", "").strip() return res.json().get("response", "").strip()
except Exception as e: except Exception as e:
attempt += 1 attempt += 1
if attempt > max_retries: if attempt > max_retries: raise e
logger.error(f"❌ Ollama request failed: {e}") await asyncio.sleep(base_delay * (2 ** (attempt - 1)))
raise e
wait_time = base_delay * (2 ** (attempt - 1))
await asyncio.sleep(wait_time)
async def generate_rag_response(self, query: str, context_str: Optional[str] = None) -> str: async def generate_rag_response(self, query: str, context_str: Optional[str] = None) -> str:
"""WP-25: Orchestrierung via DecisionEngine.""" """WP-25: Orchestrierung via DecisionEngine."""
logger.info(f"🚀 [WP-25] Chat Query: {query[:50]}...")
return await self.decision_engine.ask(query) return await self.decision_engine.ask(query)
async def close(self): async def close(self):

View File

@ -1,28 +1,32 @@
# config/decision_engine.yaml # config/decision_engine.yaml
# VERSION: 3.1.6 (WP-25: Multi-Stream Agentic RAG - Final Release) # VERSION: 3.2.2 (WP-25a: Decoupled MoE Logic)
# STATUS: Active # STATUS: Active
# DoD: # DESCRIPTION: Zentrale Orchestrierung der Multi-Stream-Engine.
# - Strikte Nutzung der Typen aus types.yaml (v2.7.0). # FIX:
# - Fix für Projekt-Klassifizierung via Keyword-Fast-Path (Auflösung Kollision). # - Auslagerung der LLM-Profile in llm_profiles.yaml zur zentralen Wartbarkeit.
# - 100% Erhalt aller Stream-Parameter und Edge-Boosts. # - Integration von compression_thresholds zur Inhaltsverdichtung (WP-25a).
# - 100% Erhalt aller WP-25 Edge-Boosts und Filter-Typen (v3.1.6).
version: 3.1 version: 3.2
settings: settings:
llm_fallback_enabled: true llm_fallback_enabled: true
# "auto" nutzt den in MINDNET_LLM_PROVIDER gesetzten Standard. # "auto" nutzt den globalen Default-Provider aus der .env
router_provider: "auto" router_provider: "auto"
# Verweist auf das Template in prompts.yaml # Verweis auf den Intent-Klassifizierer in der prompts.yaml
router_prompt_key: "intent_router_v1" router_prompt_key: "intent_router_v1"
# Pfad zur neuen Experten-Konfiguration (WP-25a Architektur-Cleanliness)
profiles_config_path: "config/llm_profiles.yaml"
# --- EBENE 1: STREAM-LIBRARY (Bausteine basierend auf types.yaml) --- # --- EBENE 1: STREAM-LIBRARY (Bausteine basierend auf types.yaml v2.7.0) ---
# Synchronisiert mit types.yaml v2.7.0
streams_library: streams_library:
values_stream: values_stream:
name: "Identität & Ethik" name: "Identität & Ethik"
# Referenz auf Experten-Profil (z.B. lokal via Ollama für Privacy)
llm_profile: "identity_safe"
compression_profile: "identity_safe"
compression_threshold: 2500
query_template: "Welche meiner Werte und Prinzipien betreffen: {query}" query_template: "Welche meiner Werte und Prinzipien betreffen: {query}"
# Nur Typen aus types.yaml
filter_types: ["value", "principle", "belief", "trait", "boundary", "need", "motivation"] filter_types: ["value", "principle", "belief", "trait", "boundary", "need", "motivation"]
top_k: 5 top_k: 5
edge_boosts: edge_boosts:
@ -32,8 +36,10 @@ streams_library:
facts_stream: facts_stream:
name: "Operative Realität" name: "Operative Realität"
llm_profile: "synthesis_pro"
compression_profile: "compression_fast"
compression_threshold: 3500
query_template: "Status, Ressourcen und Fakten zu: {query}" query_template: "Status, Ressourcen und Fakten zu: {query}"
# Nur Typen aus types.yaml
filter_types: ["project", "decision", "task", "goal", "event", "state"] filter_types: ["project", "decision", "task", "goal", "event", "state"]
top_k: 5 top_k: 5
edge_boosts: edge_boosts:
@ -43,8 +49,10 @@ streams_library:
biography_stream: biography_stream:
name: "Persönliche Erfahrung" name: "Persönliche Erfahrung"
llm_profile: "synthesis_pro"
compression_profile: "compression_fast"
compression_threshold: 3000
query_template: "Welche Erlebnisse habe ich im Kontext von {query} gemacht?" query_template: "Welche Erlebnisse habe ich im Kontext von {query} gemacht?"
# Nur Typen aus types.yaml
filter_types: ["experience", "journal", "profile", "person"] filter_types: ["experience", "journal", "profile", "person"]
top_k: 3 top_k: 3
edge_boosts: edge_boosts:
@ -53,8 +61,10 @@ streams_library:
risk_stream: risk_stream:
name: "Risiko-Radar" name: "Risiko-Radar"
llm_profile: "synthesis_pro"
compression_profile: "compression_fast"
compression_threshold: 2500
query_template: "Gefahren, Hindernisse oder Risiken bei: {query}" query_template: "Gefahren, Hindernisse oder Risiken bei: {query}"
# Nur Typen aus types.yaml
filter_types: ["risk", "obstacle", "bias"] filter_types: ["risk", "obstacle", "bias"]
top_k: 3 top_k: 3
edge_boosts: edge_boosts:
@ -64,81 +74,59 @@ streams_library:
tech_stream: tech_stream:
name: "Wissen & Technik" name: "Wissen & Technik"
llm_profile: "tech_expert"
compression_profile: "compression_fast"
compression_threshold: 4500
query_template: "Inhaltliche Details und Definitionen zu: {query}" query_template: "Inhaltliche Details und Definitionen zu: {query}"
# Nur Typen aus types.yaml
filter_types: ["concept", "source", "glossary", "idea", "insight", "skill", "habit"] filter_types: ["concept", "source", "glossary", "idea", "insight", "skill", "habit"]
top_k: 5 top_k: 5
edge_boosts: edge_boosts:
uses: 2.5 uses: 2.5
implemented_in: 3.0 implemented_in: 3.0
# --- EBENE 2: STRATEGIEN (Komposition & Routing) --- # --- EBENE 2: STRATEGIEN (Finale Komposition via MoE-Profile) ---
# Orchestriert das Zusammenspiel der Streams basierend auf dem Intent.
strategies: strategies:
# Spezialisierte Fact-Strategie für zeitliche Fragen
FACT_WHEN: FACT_WHEN:
description: "Abfrage von exakten Zeitpunkten und Terminen." description: "Abfrage von exakten Zeitpunkten und Terminen."
preferred_provider: "openrouter" llm_profile: "synthesis_pro"
# FAST PATH: Harte Keywords für zeitliche Fragen
trigger_keywords: ["wann", "datum", "uhrzeit", "zeitpunkt"] trigger_keywords: ["wann", "datum", "uhrzeit", "zeitpunkt"]
use_streams: use_streams: ["facts_stream", "biography_stream", "tech_stream"]
- "facts_stream"
- "biography_stream"
- "tech_stream"
prompt_template: "fact_synthesis_v1" prompt_template: "fact_synthesis_v1"
# Spezialisierte Fact-Strategie für inhaltliche Fragen & Listen
FACT_WHAT: FACT_WHAT:
description: "Abfrage von Definitionen, Listen und Inhalten." description: "Abfrage von Definitionen, Listen und Inhalten."
preferred_provider: "openrouter" llm_profile: "synthesis_pro"
# FIX v3.1.6: "projekt" entfernt, um Kollision mit DECISION ("Soll ich Projekt...") zu vermeiden.
trigger_keywords: ["was ist", "welche sind", "liste", "übersicht", "zusammenfassung"] trigger_keywords: ["was ist", "welche sind", "liste", "übersicht", "zusammenfassung"]
use_streams: use_streams: ["facts_stream", "tech_stream", "biography_stream"]
- "facts_stream"
- "tech_stream"
- "biography_stream"
prompt_template: "fact_synthesis_v1" prompt_template: "fact_synthesis_v1"
# Entscheidungs-Frage
DECISION: DECISION:
description: "Der User sucht Rat, Strategie oder Abwägung." description: "Der User sucht Rat, Strategie oder Abwägung."
preferred_provider: "gemini" llm_profile: "synthesis_pro"
# FIX v3.1.6: Trigger erweitert, um "Soll ich... Projekt..." sicher zu fangen.
trigger_keywords: ["soll ich", "sollte ich", "entscheidung", "abwägen", "priorität", "empfehlung"] trigger_keywords: ["soll ich", "sollte ich", "entscheidung", "abwägen", "priorität", "empfehlung"]
use_streams: use_streams: ["values_stream", "facts_stream", "risk_stream"]
- "values_stream"
- "facts_stream"
- "risk_stream"
prompt_template: "decision_synthesis_v1" prompt_template: "decision_synthesis_v1"
prepend_instruction: | prepend_instruction: |
!!! ENTSCHEIDUNGS-MODUS (AGENTIC MULTI-STREAM) !!! !!! ENTSCHEIDUNGS-MODUS (AGENTIC MULTI-STREAM) !!!
Analysiere die Fakten vor dem Hintergrund meiner Werte und evaluiere die Risiken. Analysiere die Fakten vor dem Hintergrund meiner Werte und evaluiere die Risiken.
Wäge ab, ob das Vorhaben mit meiner langfristigen Identität kompatibel ist. Wäge ab, ob das Vorhaben mit meiner langfristigen Identität kompatibel ist.
# Emotionale Reflexion
EMPATHY: EMPATHY:
description: "Reaktion auf emotionale Zustände." description: "Reaktion auf emotionale Zustände."
preferred_provider: "openrouter" llm_profile: "synthesis_pro"
trigger_keywords: ["fühle", "traurig", "glücklich", "stress", "angst"] trigger_keywords: ["fühle", "traurig", "glücklich", "stress", "angst"]
use_streams: use_streams: ["biography_stream", "values_stream"]
- "biography_stream"
- "values_stream"
prompt_template: "empathy_template" prompt_template: "empathy_template"
# Technischer Support
CODING: CODING:
description: "Technische Anfragen und Programmierung." description: "Technische Anfragen und Programmierung."
preferred_provider: "gemini" llm_profile: "tech_expert"
trigger_keywords: ["code", "python", "script", "bug", "syntax"] trigger_keywords: ["code", "python", "script", "bug", "syntax"]
use_streams: use_streams: ["tech_stream", "facts_stream"]
- "tech_stream"
- "facts_stream"
prompt_template: "technical_template" prompt_template: "technical_template"
# Eingabe-Modus (WP-07)
INTERVIEW: INTERVIEW:
description: "Der User möchte Wissen erfassen (Eingabemodus)." description: "Der User möchte Wissen erfassen (Eingabemodus)."
preferred_provider: "openrouter" llm_profile: "compression_fast"
use_streams: [] use_streams: []
prompt_template: "interview_template" prompt_template: "interview_template"

31
config/llm_profiles.yaml Normal file
View File

@ -0,0 +1,31 @@
# config/llm_profiles.yaml
# VERSION: 1.0.0 (WP-25a: Centralized MoE Profiles)
# STATUS: Active
# DESCRIPTION: Zentrale Definition der LLM-Experten-Profile für MindNet.
profiles:
# Der "Dampfhammer": Schnell und günstig für Zusammenfassungen
compression_fast:
provider: "openrouter"
model: "google/gemini-flash-1.5"
temperature: 0.1
# Der "Ingenieur": Tiefes Verständnis für Code und Logik
tech_expert:
provider: "openrouter"
model: "anthropic/claude-3-sonnet"
temperature: 0.3
# Der "Wächter": Lokal für sensible Identitäts-Daten
identity_safe:
provider: "ollama"
model: "llama3.1:8b"
temperature: 0.2
# Der "Architekt": Hochwertige Synthese und strategische Abwägung
synthesis_pro:
provider: "gemini"
model: "gemini-1.5-pro"
temperature: 0.7