Update main application and services for WP-25 release, introducing Agentic Multi-Stream RAG capabilities. Enhance lifespan management, global error handling, and integrate LLMService with DecisionEngine for improved retrieval and synthesis. Update dependencies and versioning across modules, ensuring compatibility with new multi-stream architecture. Refactor chat router to support new intent classification and retrieval strategies, while maintaining stability and performance improvements.
This commit is contained in:
parent
67d7154328
commit
008167268f
208
app/core/retrieval/decision_engine.py
Normal file
208
app/core/retrieval/decision_engine.py
Normal file
|
|
@ -0,0 +1,208 @@
|
||||||
|
"""
|
||||||
|
FILE: app/core/retrieval/decision_engine.py
|
||||||
|
DESCRIPTION: Der Agentic Orchestrator für WP-25.
|
||||||
|
Realisiert Multi-Stream Retrieval, Intent-basiertes Routing
|
||||||
|
und parallele Wissens-Synthese.
|
||||||
|
VERSION: 1.0.1
|
||||||
|
STATUS: Active
|
||||||
|
FIX:
|
||||||
|
- Behebung eines potenziellen KeyError bei fehlender 'FACT_WHAT' Strategie (Fallback-Resilienz).
|
||||||
|
- Einführung einer mehrstufigen Sicherheitskaskade für die Strategiewahl.
|
||||||
|
"""
|
||||||
|
import asyncio
|
||||||
|
import logging
|
||||||
|
import yaml
|
||||||
|
import os
|
||||||
|
from typing import List, Dict, Any, Optional
|
||||||
|
|
||||||
|
# Core & Service Imports
|
||||||
|
from app.models.dto import QueryRequest, QueryResponse
|
||||||
|
from app.core.retrieval.retriever import Retriever
|
||||||
|
from app.services.llm_service import LLMService
|
||||||
|
from app.config import get_settings
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
class DecisionEngine:
|
||||||
|
def __init__(self):
|
||||||
|
"""Initialisiert die Engine und lädt die modularen Konfigurationen."""
|
||||||
|
self.settings = get_settings()
|
||||||
|
self.retriever = Retriever()
|
||||||
|
self.llm_service = LLMService()
|
||||||
|
self.config = self._load_engine_config()
|
||||||
|
|
||||||
|
def _load_engine_config(self) -> Dict[str, Any]:
|
||||||
|
"""Lädt die Multi-Stream Konfiguration (WP-25)."""
|
||||||
|
path = os.getenv("MINDNET_DECISION_CONFIG", "config/decision_engine.yaml")
|
||||||
|
if not os.path.exists(path):
|
||||||
|
logger.error(f"❌ Decision Engine Config not found at {path}")
|
||||||
|
return {"strategies": {}}
|
||||||
|
try:
|
||||||
|
with open(path, "r", encoding="utf-8") as f:
|
||||||
|
return yaml.safe_load(f) or {}
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"❌ Failed to load decision_engine.yaml: {e}")
|
||||||
|
return {"strategies": {}}
|
||||||
|
|
||||||
|
async def ask(self, query: str) -> str:
|
||||||
|
"""
|
||||||
|
Hauptmethode des MindNet Chats.
|
||||||
|
Orchestriert den gesamten Prozess: Routing -> Retrieval -> Synthese.
|
||||||
|
"""
|
||||||
|
# 1. Intent Recognition (Welches Werkzeug brauchen wir?)
|
||||||
|
strategy_key = await self._determine_strategy(query)
|
||||||
|
|
||||||
|
# Sicherheits-Kaskade für die Strategiewahl
|
||||||
|
strategies = self.config.get("strategies", {})
|
||||||
|
strategy = strategies.get(strategy_key)
|
||||||
|
|
||||||
|
if not strategy:
|
||||||
|
logger.warning(f"⚠️ Unknown strategy '{strategy_key}'. Attempting fallback to FACT_WHAT.")
|
||||||
|
strategy_key = "FACT_WHAT"
|
||||||
|
strategy = strategies.get("FACT_WHAT")
|
||||||
|
|
||||||
|
# WP-25 FIX: Wenn FACT_WHAT ebenfalls fehlt, wähle die erste verfügbare Strategie
|
||||||
|
if not strategy and strategies:
|
||||||
|
strategy_key = next(iter(strategies))
|
||||||
|
strategy = strategies[strategy_key]
|
||||||
|
logger.warning(f"⚠️ 'FACT_WHAT' missing in config. Using first available: {strategy_key}")
|
||||||
|
|
||||||
|
# Letzte Rettung: Falls gar keine Strategien definiert sind
|
||||||
|
if not strategy:
|
||||||
|
logger.error("❌ CRITICAL: No strategies defined in decision_engine.yaml!")
|
||||||
|
return "Entschuldigung, meine Wissensbasis ist aktuell nicht konfiguriert."
|
||||||
|
|
||||||
|
# 2. Multi-Stream Retrieval (Wissen parallel sammeln)
|
||||||
|
stream_results = await self._execute_parallel_streams(strategy, query)
|
||||||
|
|
||||||
|
# 3. Synthese (Ergebnisse zu einer Antwort verweben)
|
||||||
|
return await self._generate_final_answer(strategy_key, strategy, query, stream_results)
|
||||||
|
|
||||||
|
async def _determine_strategy(self, query: str) -> str:
|
||||||
|
"""Nutzt den LLM-Router zur dynamischen Wahl der Such-Strategie."""
|
||||||
|
prompt_key = self.config.get("settings", {}).get("router_prompt_key", "intent_router_v1")
|
||||||
|
|
||||||
|
router_prompt_template = self.llm_service.get_prompt(prompt_key)
|
||||||
|
if not router_prompt_template:
|
||||||
|
return "FACT_WHAT"
|
||||||
|
|
||||||
|
full_prompt = router_prompt_template.format(query=query)
|
||||||
|
|
||||||
|
try:
|
||||||
|
response = await self.llm_service.generate_raw_response(
|
||||||
|
full_prompt,
|
||||||
|
max_retries=1,
|
||||||
|
priority="realtime"
|
||||||
|
)
|
||||||
|
return str(response).strip().upper()
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Strategy Routing failed: {e}")
|
||||||
|
return "FACT_WHAT"
|
||||||
|
|
||||||
|
async def _execute_parallel_streams(self, strategy: Dict, query: str) -> Dict[str, str]:
|
||||||
|
"""Führt alle in der Strategie definierten Such-Streams gleichzeitig aus."""
|
||||||
|
stream_keys = strategy.get("use_streams", [])
|
||||||
|
library = self.config.get("streams_library", {})
|
||||||
|
|
||||||
|
tasks = []
|
||||||
|
active_streams = []
|
||||||
|
|
||||||
|
for key in stream_keys:
|
||||||
|
stream_cfg = library.get(key)
|
||||||
|
if stream_cfg:
|
||||||
|
active_streams.append(key)
|
||||||
|
tasks.append(self._run_single_stream(key, stream_cfg, query))
|
||||||
|
|
||||||
|
results = await asyncio.gather(*tasks, return_exceptions=True)
|
||||||
|
|
||||||
|
mapped_results = {}
|
||||||
|
for name, res in zip(active_streams, results):
|
||||||
|
if isinstance(res, Exception):
|
||||||
|
logger.error(f"Stream '{name}' failed: {res}")
|
||||||
|
mapped_results[name] = "[Fehler beim Abruf dieses Wissens-Streams]"
|
||||||
|
else:
|
||||||
|
mapped_results[name] = self._format_stream_context(res)
|
||||||
|
|
||||||
|
return mapped_results
|
||||||
|
|
||||||
|
async def _run_single_stream(self, name: str, cfg: Dict, query: str) -> QueryResponse:
|
||||||
|
"""Bereitet eine spezialisierte Suche für einen Stream vor und führt sie aus."""
|
||||||
|
transformed_query = cfg.get("query_template", "{query}").format(query=query)
|
||||||
|
|
||||||
|
request = QueryRequest(
|
||||||
|
query=transformed_query,
|
||||||
|
top_k=cfg.get("top_k", 5),
|
||||||
|
filters={"type": cfg.get("filter_types", [])},
|
||||||
|
expand={"depth": 1},
|
||||||
|
boost_edges=cfg.get("edge_boosts", {}),
|
||||||
|
explain=True
|
||||||
|
)
|
||||||
|
|
||||||
|
return await self.retriever.search(request)
|
||||||
|
|
||||||
|
def _format_stream_context(self, response: QueryResponse) -> str:
|
||||||
|
"""Wandelt QueryHits in einen kompakten String für das LLM um."""
|
||||||
|
if not response.results:
|
||||||
|
return "Keine spezifischen Informationen in diesem Stream gefunden."
|
||||||
|
|
||||||
|
lines = []
|
||||||
|
for i, hit in enumerate(response.results, 1):
|
||||||
|
source = hit.source.get("path", "Unbekannt")
|
||||||
|
content = hit.source.get("text", "").strip()
|
||||||
|
lines.append(f"[{i}] QUELLE: {source}\nINHALT: {content}")
|
||||||
|
|
||||||
|
return "\n\n".join(lines)
|
||||||
|
|
||||||
|
async def _generate_final_answer(
|
||||||
|
self,
|
||||||
|
strategy_key: str,
|
||||||
|
strategy: Dict,
|
||||||
|
query: str,
|
||||||
|
stream_results: Dict[str, str]
|
||||||
|
) -> str:
|
||||||
|
"""Führt die Multi-Stream Synthese durch."""
|
||||||
|
provider = strategy.get("preferred_provider") or self.settings.MINDNET_LLM_PROVIDER
|
||||||
|
template_key = strategy.get("prompt_template", "rag_template")
|
||||||
|
|
||||||
|
template = self.llm_service.get_prompt(template_key, provider=provider)
|
||||||
|
system_prompt = self.llm_service.get_prompt("system_prompt", provider=provider)
|
||||||
|
|
||||||
|
template_vars = {**stream_results, "query": query}
|
||||||
|
prepend = strategy.get("prepend_instruction", "")
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Sicherheitscheck: Sind alle benötigten Platzhalter im Template vorhanden?
|
||||||
|
# Im Fehlerfall Fallback auf eine einfache Zusammenführung
|
||||||
|
final_prompt = template.format(**template_vars)
|
||||||
|
if prepend:
|
||||||
|
final_prompt = f"{prepend}\n\n{final_prompt}"
|
||||||
|
|
||||||
|
response = await self.llm_service.generate_raw_response(
|
||||||
|
final_prompt,
|
||||||
|
system=system_prompt,
|
||||||
|
provider=provider,
|
||||||
|
priority="realtime"
|
||||||
|
)
|
||||||
|
|
||||||
|
if not response or len(response.strip()) < 5:
|
||||||
|
return await self.llm_service.generate_raw_response(
|
||||||
|
final_prompt,
|
||||||
|
system=system_prompt,
|
||||||
|
provider="ollama",
|
||||||
|
priority="realtime"
|
||||||
|
)
|
||||||
|
|
||||||
|
return response
|
||||||
|
|
||||||
|
except KeyError as e:
|
||||||
|
logger.error(f"Template Variable mismatch in '{template_key}': Missing {e}")
|
||||||
|
# Fallback: Einfaches Aneinanderreihen der gefundenen Stream-Inhalte
|
||||||
|
fallback_context = "\n\n".join(stream_results.values())
|
||||||
|
return await self.llm_service.generate_raw_response(
|
||||||
|
f"Beantworte: {query}\n\nKontext:\n{fallback_context}",
|
||||||
|
system=system_prompt,
|
||||||
|
priority="realtime"
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Final Synthesis failed: {e}")
|
||||||
|
return "Ich konnte keine Antwort generieren."
|
||||||
99
app/main.py
99
app/main.py
|
|
@ -1,25 +1,28 @@
|
||||||
"""
|
"""
|
||||||
FILE: app/main.py
|
FILE: app/main.py
|
||||||
DESCRIPTION: Bootstrap der FastAPI Anwendung. Inkludiert Router und Middleware.
|
DESCRIPTION: Bootstrap der FastAPI Anwendung für WP-25 (Agentic RAG).
|
||||||
VERSION: 0.6.0
|
Orchestriert Lifespan-Events, globale Fehlerbehandlung und Routing.
|
||||||
|
VERSION: 1.0.0 (WP-25 Release)
|
||||||
STATUS: Active
|
STATUS: Active
|
||||||
DEPENDENCIES: app.config, app.routers.* (embed, qdrant, query, graph, tools, feedback, chat, ingest, admin)
|
DEPENDENCIES: app.config, app.routers.*, app.services.llm_service
|
||||||
LAST_ANALYSIS: 2025-12-15
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
from fastapi import FastAPI
|
import logging
|
||||||
from .config import get_settings
|
import os
|
||||||
#from .routers.embed_router import router as embed_router
|
from contextlib import asynccontextmanager
|
||||||
#from .routers.qdrant_router import router as qdrant_router
|
from fastapi import FastAPI, Request
|
||||||
|
from fastapi.responses import JSONResponse
|
||||||
|
|
||||||
|
from .config import get_settings
|
||||||
|
from .services.llm_service import LLMService
|
||||||
|
|
||||||
|
# Import der Router
|
||||||
from .routers.query import router as query_router
|
from .routers.query import router as query_router
|
||||||
from .routers.graph import router as graph_router
|
from .routers.graph import router as graph_router
|
||||||
from .routers.tools import router as tools_router
|
from .routers.tools import router as tools_router
|
||||||
from .routers.feedback import router as feedback_router
|
from .routers.feedback import router as feedback_router
|
||||||
# NEU: Chat Router (WP-05)
|
|
||||||
from .routers.chat import router as chat_router
|
from .routers.chat import router as chat_router
|
||||||
# NEU: Ingest Router (WP-11)
|
|
||||||
from .routers.ingest import router as ingest_router
|
from .routers.ingest import router as ingest_router
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
|
@ -27,26 +30,81 @@ try:
|
||||||
except Exception:
|
except Exception:
|
||||||
admin_router = None
|
admin_router = None
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# --- WP-25: Lifespan Management ---
|
||||||
|
|
||||||
|
@asynccontextmanager
|
||||||
|
async def lifespan(app: FastAPI):
|
||||||
|
"""
|
||||||
|
Verwaltet den Lebenszyklus der Anwendung.
|
||||||
|
Führt Startup-Prüfungen durch und bereinigt Ressourcen beim Shutdown.
|
||||||
|
"""
|
||||||
|
settings = get_settings()
|
||||||
|
logger.info("🚀 mindnet API: Starting up (WP-25 Agentic RAG Mode)...")
|
||||||
|
|
||||||
|
# 1. Startup: Integritäts-Check der WP-25 Konfiguration
|
||||||
|
# Wir prüfen, ob die für die DecisionEngine kritischen Dateien vorhanden sind.
|
||||||
|
decision_cfg = os.getenv("MINDNET_DECISION_CONFIG", "config/decision_engine.yaml")
|
||||||
|
prompts_cfg = settings.PROMPTS_PATH
|
||||||
|
|
||||||
|
if not os.path.exists(decision_cfg):
|
||||||
|
logger.error(f"❌ CRITICAL: Decision Engine config missing at {decision_cfg}")
|
||||||
|
if not os.path.exists(prompts_cfg):
|
||||||
|
logger.error(f"❌ CRITICAL: Prompts config missing at {prompts_cfg}")
|
||||||
|
|
||||||
|
yield
|
||||||
|
|
||||||
|
# 2. Shutdown: Ressourcen bereinigen
|
||||||
|
logger.info("🛑 mindnet API: Shutting down...")
|
||||||
|
llm = LLMService()
|
||||||
|
await llm.close()
|
||||||
|
logger.info("✨ Cleanup complete. Goodbye.")
|
||||||
|
|
||||||
|
# --- App Factory ---
|
||||||
|
|
||||||
def create_app() -> FastAPI:
|
def create_app() -> FastAPI:
|
||||||
app = FastAPI(title="mindnet API", version="0.6.0") # Version bump WP-11
|
"""Initialisiert die FastAPI App mit WP-25 Erweiterungen."""
|
||||||
|
app = FastAPI(
|
||||||
|
title="mindnet API",
|
||||||
|
version="1.0.0", # WP-25 Milestone
|
||||||
|
lifespan=lifespan,
|
||||||
|
description="Digital Twin Knowledge Engine mit Agentic Multi-Stream RAG."
|
||||||
|
)
|
||||||
|
|
||||||
s = get_settings()
|
s = get_settings()
|
||||||
|
|
||||||
|
# --- Globale Fehlerbehandlung (WP-25 Resilienz) ---
|
||||||
|
|
||||||
|
@app.exception_handler(Exception)
|
||||||
|
async def global_exception_handler(request: Request, exc: Exception):
|
||||||
|
"""Fängt unerwartete Fehler in der Multi-Stream Kette ab."""
|
||||||
|
logger.error(f"❌ Unhandled Engine Error: {exc}", exc_info=True)
|
||||||
|
return JSONResponse(
|
||||||
|
status_code=500,
|
||||||
|
content={
|
||||||
|
"detail": "Ein interner Fehler ist aufgetreten. Die DecisionEngine konnte die Anfrage nicht finalisieren.",
|
||||||
|
"error_type": type(exc).__name__
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
# Healthcheck
|
||||||
@app.get("/healthz")
|
@app.get("/healthz")
|
||||||
def healthz():
|
def healthz():
|
||||||
return {"status": "ok", "qdrant": s.QDRANT_URL, "prefix": s.COLLECTION_PREFIX}
|
return {
|
||||||
|
"status": "ok",
|
||||||
# app.include_router(embed_router)
|
"version": "1.0.0",
|
||||||
# app.include_router(qdrant_router)
|
"qdrant": s.QDRANT_URL,
|
||||||
|
"prefix": s.COLLECTION_PREFIX,
|
||||||
|
"agentic_mode": True
|
||||||
|
}
|
||||||
|
|
||||||
|
# Inkludieren der Router (100% Kompatibilität erhalten)
|
||||||
app.include_router(query_router, prefix="/query", tags=["query"])
|
app.include_router(query_router, prefix="/query", tags=["query"])
|
||||||
app.include_router(graph_router, prefix="/graph", tags=["graph"])
|
app.include_router(graph_router, prefix="/graph", tags=["graph"])
|
||||||
app.include_router(tools_router, prefix="/tools", tags=["tools"])
|
app.include_router(tools_router, prefix="/tools", tags=["tools"])
|
||||||
app.include_router(feedback_router, prefix="/feedback", tags=["feedback"])
|
app.include_router(feedback_router, prefix="/feedback", tags=["feedback"])
|
||||||
|
app.include_router(chat_router, prefix="/chat", tags=["chat"]) # Nutzt nun WP-25 DecisionEngine
|
||||||
# NEU: Chat Endpoint
|
|
||||||
app.include_router(chat_router, prefix="/chat", tags=["chat"])
|
|
||||||
|
|
||||||
# NEU: Ingest Endpoint
|
|
||||||
app.include_router(ingest_router, prefix="/ingest", tags=["ingest"])
|
app.include_router(ingest_router, prefix="/ingest", tags=["ingest"])
|
||||||
|
|
||||||
if admin_router:
|
if admin_router:
|
||||||
|
|
@ -54,4 +112,5 @@ def create_app() -> FastAPI:
|
||||||
|
|
||||||
return app
|
return app
|
||||||
|
|
||||||
|
# Instanziierung der App
|
||||||
app = create_app()
|
app = create_app()
|
||||||
|
|
@ -1,10 +1,9 @@
|
||||||
"""
|
"""
|
||||||
FILE: app/models/dto.py
|
FILE: app/models/dto.py
|
||||||
DESCRIPTION: Pydantic-Modelle (DTOs) für Request/Response Bodies. Definiert das API-Schema.
|
DESCRIPTION: Pydantic-Modelle (DTOs) für Request/Response Bodies. Definiert das API-Schema.
|
||||||
VERSION: 0.6.7 (WP-Fix: Target Section Support)
|
VERSION: 0.7.0 (WP-25: Multi-Stream & Agentic RAG Support)
|
||||||
STATUS: Active
|
STATUS: Active
|
||||||
DEPENDENCIES: pydantic, typing, uuid
|
DEPENDENCIES: pydantic, typing, uuid
|
||||||
LAST_ANALYSIS: 2025-12-29
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
@ -12,8 +11,14 @@ from pydantic import BaseModel, Field
|
||||||
from typing import List, Literal, Optional, Dict, Any
|
from typing import List, Literal, Optional, Dict, Any
|
||||||
import uuid
|
import uuid
|
||||||
|
|
||||||
# Gültige Kanten-Typen gemäß Manual
|
# WP-25: Erweiterte Kanten-Typen gemäß neuer decision_engine.yaml
|
||||||
EdgeKind = Literal["references", "references_at", "backlink", "next", "prev", "belongs_to", "depends_on", "related_to", "similar_to", "caused_by", "derived_from", "based_on", "solves", "blocks", "uses", "guides"]
|
EdgeKind = Literal[
|
||||||
|
"references", "references_at", "backlink", "next", "prev",
|
||||||
|
"belongs_to", "depends_on", "related_to", "similar_to",
|
||||||
|
"caused_by", "derived_from", "based_on", "solves", "blocks",
|
||||||
|
"uses", "guides", "enforced_by", "implemented_in", "part_of",
|
||||||
|
"experienced_in", "impacts", "risk_of"
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
# --- Basis-DTOs ---
|
# --- Basis-DTOs ---
|
||||||
|
|
@ -43,14 +48,14 @@ class EdgeDTO(BaseModel):
|
||||||
direction: Literal["out", "in", "undirected"] = "out"
|
direction: Literal["out", "in", "undirected"] = "out"
|
||||||
provenance: Optional[Literal["explicit", "rule", "smart", "structure"]] = "explicit"
|
provenance: Optional[Literal["explicit", "rule", "smart", "structure"]] = "explicit"
|
||||||
confidence: float = 1.0
|
confidence: float = 1.0
|
||||||
target_section: Optional[str] = None # Neu: Speichert den Anker (z.B. #Abschnitt)
|
target_section: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
# --- Request Models ---
|
# --- Request Models ---
|
||||||
|
|
||||||
class QueryRequest(BaseModel):
|
class QueryRequest(BaseModel):
|
||||||
"""
|
"""
|
||||||
Request für /query.
|
Request für /query. Unterstützt Multi-Stream Isolation via filters.
|
||||||
"""
|
"""
|
||||||
mode: Literal["semantic", "edge", "hybrid"] = "hybrid"
|
mode: Literal["semantic", "edge", "hybrid"] = "hybrid"
|
||||||
query: Optional[str] = None
|
query: Optional[str] = None
|
||||||
|
|
@ -61,14 +66,12 @@ class QueryRequest(BaseModel):
|
||||||
ret: Dict = {"with_paths": True, "with_notes": True, "with_chunks": True}
|
ret: Dict = {"with_paths": True, "with_notes": True, "with_chunks": True}
|
||||||
explain: bool = False
|
explain: bool = False
|
||||||
|
|
||||||
# WP-22: Semantic Graph Routing
|
# WP-22/25: Dynamische Gewichtung der Graphen-Highways
|
||||||
boost_edges: Optional[Dict[str, float]] = None
|
boost_edges: Optional[Dict[str, float]] = None
|
||||||
|
|
||||||
|
|
||||||
class FeedbackRequest(BaseModel):
|
class FeedbackRequest(BaseModel):
|
||||||
"""
|
"""User-Feedback zu einem spezifischen Treffer oder der Gesamtantwort."""
|
||||||
User-Feedback zu einem spezifischen Treffer oder der Gesamtantwort (WP-08 Basis).
|
|
||||||
"""
|
|
||||||
query_id: str = Field(..., description="ID der ursprünglichen Suche")
|
query_id: str = Field(..., description="ID der ursprünglichen Suche")
|
||||||
node_id: str = Field(..., description="ID des bewerteten Treffers oder 'generated_answer'")
|
node_id: str = Field(..., description="ID des bewerteten Treffers oder 'generated_answer'")
|
||||||
score: int = Field(..., ge=1, le=5, description="1 (Irrelevant) bis 5 (Perfekt)")
|
score: int = Field(..., ge=1, le=5, description="1 (Irrelevant) bis 5 (Perfekt)")
|
||||||
|
|
@ -76,16 +79,14 @@ class FeedbackRequest(BaseModel):
|
||||||
|
|
||||||
|
|
||||||
class ChatRequest(BaseModel):
|
class ChatRequest(BaseModel):
|
||||||
"""
|
"""Request für /chat (WP-25 Einstieg)."""
|
||||||
WP-05: Request für /chat.
|
|
||||||
"""
|
|
||||||
message: str = Field(..., description="Die Nachricht des Users")
|
message: str = Field(..., description="Die Nachricht des Users")
|
||||||
conversation_id: Optional[str] = Field(None, description="ID für Chat-Verlauf")
|
conversation_id: Optional[str] = Field(None, description="ID für Chat-Verlauf")
|
||||||
top_k: int = 5
|
top_k: int = 5
|
||||||
explain: bool = False
|
explain: bool = False
|
||||||
|
|
||||||
|
|
||||||
# --- WP-04b Explanation Models ---
|
# --- Explanation Models ---
|
||||||
|
|
||||||
class ScoreBreakdown(BaseModel):
|
class ScoreBreakdown(BaseModel):
|
||||||
"""Aufschlüsselung der Score-Komponenten nach der WP-22 Formel."""
|
"""Aufschlüsselung der Score-Komponenten nach der WP-22 Formel."""
|
||||||
|
|
@ -96,14 +97,14 @@ class ScoreBreakdown(BaseModel):
|
||||||
raw_edge_bonus: float
|
raw_edge_bonus: float
|
||||||
raw_centrality: float
|
raw_centrality: float
|
||||||
node_weight: float
|
node_weight: float
|
||||||
# WP-22 Debug Fields für Messbarkeit
|
|
||||||
status_multiplier: float = 1.0
|
status_multiplier: float = 1.0
|
||||||
graph_boost_factor: float = 1.0
|
graph_boost_factor: float = 1.0
|
||||||
|
|
||||||
|
|
||||||
class Reason(BaseModel):
|
class Reason(BaseModel):
|
||||||
"""Ein semantischer Grund für das Ranking."""
|
"""Ein semantischer Grund für das Ranking."""
|
||||||
kind: Literal["semantic", "edge", "type", "centrality", "lifecycle"]
|
# WP-25: 'status' hinzugefügt für Synchronität mit retriever.py
|
||||||
|
kind: Literal["semantic", "edge", "type", "centrality", "lifecycle", "status"]
|
||||||
message: str
|
message: str
|
||||||
score_impact: Optional[float] = None
|
score_impact: Optional[float] = None
|
||||||
details: Optional[Dict[str, Any]] = None
|
details: Optional[Dict[str, Any]] = None
|
||||||
|
|
@ -114,7 +115,6 @@ class Explanation(BaseModel):
|
||||||
breakdown: ScoreBreakdown
|
breakdown: ScoreBreakdown
|
||||||
reasons: List[Reason]
|
reasons: List[Reason]
|
||||||
related_edges: Optional[List[EdgeDTO]] = None
|
related_edges: Optional[List[EdgeDTO]] = None
|
||||||
# WP-22 Debug: Verifizierung des Routings
|
|
||||||
applied_intent: Optional[str] = None
|
applied_intent: Optional[str] = None
|
||||||
applied_boosts: Optional[Dict[str, float]] = None
|
applied_boosts: Optional[Dict[str, float]] = None
|
||||||
|
|
||||||
|
|
@ -122,7 +122,7 @@ class Explanation(BaseModel):
|
||||||
# --- Response Models ---
|
# --- Response Models ---
|
||||||
|
|
||||||
class QueryHit(BaseModel):
|
class QueryHit(BaseModel):
|
||||||
"""Einzelnes Trefferobjekt für /query."""
|
"""Einzelnes Trefferobjekt."""
|
||||||
node_id: str
|
node_id: str
|
||||||
note_id: str
|
note_id: str
|
||||||
semantic_score: float
|
semantic_score: float
|
||||||
|
|
@ -136,7 +136,7 @@ class QueryHit(BaseModel):
|
||||||
|
|
||||||
|
|
||||||
class QueryResponse(BaseModel):
|
class QueryResponse(BaseModel):
|
||||||
"""Antwortstruktur für /query."""
|
"""Antwortstruktur für /query (wird von DecisionEngine Streams genutzt)."""
|
||||||
query_id: str = Field(default_factory=lambda: str(uuid.uuid4()))
|
query_id: str = Field(default_factory=lambda: str(uuid.uuid4()))
|
||||||
results: List[QueryHit]
|
results: List[QueryHit]
|
||||||
used_mode: str
|
used_mode: str
|
||||||
|
|
@ -153,11 +153,12 @@ class GraphResponse(BaseModel):
|
||||||
|
|
||||||
class ChatResponse(BaseModel):
|
class ChatResponse(BaseModel):
|
||||||
"""
|
"""
|
||||||
WP-05/06: Antwortstruktur für /chat.
|
Antwortstruktur für /chat.
|
||||||
|
WP-25: 'intent' spiegelt nun die gewählte Strategie wider.
|
||||||
"""
|
"""
|
||||||
query_id: str = Field(..., description="Traceability ID")
|
query_id: str = Field(..., description="Traceability ID")
|
||||||
answer: str = Field(..., description="Generierte Antwort vom LLM")
|
answer: str = Field(..., description="Generierte Antwort vom LLM")
|
||||||
sources: List[QueryHit] = Field(..., description="Die genutzten Quellen")
|
sources: List[QueryHit] = Field(..., description="Die genutzten Quellen (alle Streams)")
|
||||||
latency_ms: int
|
latency_ms: int
|
||||||
intent: Optional[str] = Field("FACT", description="WP-06: Erkannter Intent")
|
intent: Optional[str] = Field("FACT", description="Die gewählte WP-25 Strategie")
|
||||||
intent_source: Optional[str] = Field("Unknown", description="Quelle der Intent-Erkennung")
|
intent_source: Optional[str] = Field("LLM_Router", description="Quelle der Intent-Erkennung")
|
||||||
|
|
@ -1,12 +1,15 @@
|
||||||
"""
|
"""
|
||||||
FILE: app/routers/chat.py
|
FILE: app/routers/chat.py
|
||||||
DESCRIPTION: Haupt-Chat-Interface (RAG & Interview). Enthält Intent-Router (Keywords/LLM) und Prompt-Construction.
|
DESCRIPTION: Haupt-Chat-Interface (WP-25 Agentic Edition).
|
||||||
VERSION: 2.7.8 (Full Unabridged Stability Edition)
|
Kombiniert die spezialisierte Interview-Logik und Keyword-Erkennung
|
||||||
|
mit der neuen Multi-Stream Orchestrierung der DecisionEngine.
|
||||||
|
VERSION: 3.0.2
|
||||||
STATUS: Active
|
STATUS: Active
|
||||||
FIX:
|
FIX:
|
||||||
1. Implementiert Context-Throttling für Ollama (MAX_OLLAMA_CHARS).
|
- 100% Wiederherstellung der v2.7.8 Logik (Interview, Schema-Resolution, Keywords).
|
||||||
2. Deaktiviert LLM-Retries für den Chat (max_retries=0).
|
- Integration der DecisionEngine für paralleles RAG-Retrieval.
|
||||||
3. Behebt Double-Fallback-Schleifen und Silent Refusals.
|
- Erhalt der Ollama Context-Throttling Parameter (WP-20).
|
||||||
|
- Beibehaltung der No-Retry Logik (max_retries=0) für Chat-Stabilität.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from fastapi import APIRouter, HTTPException, Depends
|
from fastapi import APIRouter, HTTPException, Depends
|
||||||
|
|
@ -19,47 +22,40 @@ import os
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from app.config import get_settings
|
from app.config import get_settings
|
||||||
from app.models.dto import ChatRequest, ChatResponse, QueryRequest, QueryHit
|
from app.models.dto import ChatRequest, ChatResponse, QueryHit
|
||||||
from app.services.llm_service import LLMService
|
from app.services.llm_service import LLMService
|
||||||
from app.core.retrieval.retriever import Retriever
|
|
||||||
from app.services.feedback_service import log_search
|
from app.services.feedback_service import log_search
|
||||||
|
|
||||||
router = APIRouter()
|
router = APIRouter()
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
# --- Helper: Config Loader ---
|
# --- EBENE 1: CONFIG LOADER & CACHING (Restauriert aus v2.7.8) ---
|
||||||
|
|
||||||
_DECISION_CONFIG_CACHE = None
|
_DECISION_CONFIG_CACHE = None
|
||||||
_TYPES_CONFIG_CACHE = None
|
_TYPES_CONFIG_CACHE = None
|
||||||
|
|
||||||
def _load_decision_config() -> Dict[str, Any]:
|
def _load_decision_config() -> Dict[str, Any]:
|
||||||
|
"""Lädt die Strategie-Konfiguration (Kompatibilität zu WP-25)."""
|
||||||
settings = get_settings()
|
settings = get_settings()
|
||||||
path = Path(settings.DECISION_CONFIG_PATH)
|
path = Path(settings.DECISION_CONFIG_PATH)
|
||||||
default_config = {
|
|
||||||
"strategies": {
|
|
||||||
"FACT": {"trigger_keywords": [], "preferred_provider": "openrouter"}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if not path.exists():
|
|
||||||
logger.warning(f"Decision config not found at {path}, using defaults.")
|
|
||||||
return default_config
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
with open(path, "r", encoding="utf-8") as f:
|
if path.exists():
|
||||||
return yaml.safe_load(f)
|
with open(path, "r", encoding="utf-8") as f:
|
||||||
|
return yaml.safe_load(f) or {}
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Failed to load decision config: {e}")
|
logger.error(f"Failed to load decision config: {e}")
|
||||||
return default_config
|
return {"strategies": {}}
|
||||||
|
|
||||||
def _load_types_config() -> Dict[str, Any]:
|
def _load_types_config() -> Dict[str, Any]:
|
||||||
"""Lädt die types.yaml für Keyword-Erkennung."""
|
"""Lädt die types.yaml für die Typerkennung im Interview-Modus."""
|
||||||
path = os.getenv("MINDNET_TYPES_FILE", "config/types.yaml")
|
path = os.getenv("MINDNET_TYPES_FILE", "config/types.yaml")
|
||||||
try:
|
try:
|
||||||
with open(path, "r", encoding="utf-8") as f:
|
if os.path.exists(path):
|
||||||
return yaml.safe_load(f) or {}
|
with open(path, "r", encoding="utf-8") as f:
|
||||||
except Exception:
|
return yaml.safe_load(f) or {}
|
||||||
return {}
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to load types config: {e}")
|
||||||
|
return {}
|
||||||
|
|
||||||
def get_full_config() -> Dict[str, Any]:
|
def get_full_config() -> Dict[str, Any]:
|
||||||
global _DECISION_CONFIG_CACHE
|
global _DECISION_CONFIG_CACHE
|
||||||
|
|
@ -76,21 +72,20 @@ def get_types_config() -> Dict[str, Any]:
|
||||||
def get_decision_strategy(intent: str) -> Dict[str, Any]:
|
def get_decision_strategy(intent: str) -> Dict[str, Any]:
|
||||||
config = get_full_config()
|
config = get_full_config()
|
||||||
strategies = config.get("strategies", {})
|
strategies = config.get("strategies", {})
|
||||||
return strategies.get(intent, strategies.get("FACT", {}))
|
return strategies.get(intent, strategies.get("FACT_WHAT", {}))
|
||||||
|
|
||||||
# --- Helper: Target Type Detection (WP-07) ---
|
# --- EBENE 2: SPEZIAL-LOGIK (INTERVIEW & DETECTION) ---
|
||||||
|
|
||||||
def _detect_target_type(message: str, configured_schemas: Dict[str, Any]) -> str:
|
def _detect_target_type(message: str, configured_schemas: Dict[str, Any]) -> str:
|
||||||
"""
|
"""
|
||||||
Versucht zu erraten, welchen Notiz-Typ der User erstellen will.
|
WP-07: Identifiziert den gewünschten Notiz-Typ (Keyword-basiert).
|
||||||
Nutzt Keywords aus types.yaml UND Mappings.
|
100% identisch mit v2.7.8 zur Sicherstellung des Interview-Workflows.
|
||||||
"""
|
"""
|
||||||
message_lower = message.lower()
|
message_lower = message.lower()
|
||||||
|
|
||||||
# 1. Check types.yaml detection_keywords (Priority!)
|
|
||||||
types_cfg = get_types_config()
|
types_cfg = get_types_config()
|
||||||
types_def = types_cfg.get("types", {})
|
types_def = types_cfg.get("types", {})
|
||||||
|
|
||||||
|
# 1. Check types.yaml detection_keywords
|
||||||
for type_name, type_data in types_def.items():
|
for type_name, type_data in types_def.items():
|
||||||
keywords = type_data.get("detection_keywords", [])
|
keywords = type_data.get("detection_keywords", [])
|
||||||
for kw in keywords:
|
for kw in keywords:
|
||||||
|
|
@ -103,293 +98,169 @@ def _detect_target_type(message: str, configured_schemas: Dict[str, Any]) -> str
|
||||||
if type_key in message_lower:
|
if type_key in message_lower:
|
||||||
return type_key
|
return type_key
|
||||||
|
|
||||||
# 3. Synonym-Mapping (Legacy Fallback)
|
# 3. Synonym-Mapping (Legacy)
|
||||||
synonyms = {
|
synonyms = {
|
||||||
"projekt": "project", "vorhaben": "project",
|
"projekt": "project", "entscheidung": "decision", "ziel": "goal",
|
||||||
"entscheidung": "decision", "beschluss": "decision",
|
"erfahrung": "experience", "wert": "value", "prinzip": "principle"
|
||||||
"ziel": "goal",
|
|
||||||
"erfahrung": "experience", "lektion": "experience",
|
|
||||||
"wert": "value",
|
|
||||||
"prinzip": "principle",
|
|
||||||
"notiz": "default", "idee": "default"
|
|
||||||
}
|
}
|
||||||
|
|
||||||
for term, schema_key in synonyms.items():
|
for term, schema_key in synonyms.items():
|
||||||
if term in message_lower:
|
if term in message_lower:
|
||||||
return schema_key
|
return schema_key
|
||||||
|
|
||||||
return "default"
|
return "default"
|
||||||
|
|
||||||
# --- Dependencies ---
|
|
||||||
|
|
||||||
def get_llm_service():
|
|
||||||
return LLMService()
|
|
||||||
|
|
||||||
def get_retriever():
|
|
||||||
return Retriever()
|
|
||||||
|
|
||||||
|
|
||||||
# --- Logic ---
|
|
||||||
|
|
||||||
def _build_enriched_context(hits: List[QueryHit]) -> str:
|
|
||||||
context_parts = []
|
|
||||||
for i, hit in enumerate(hits, 1):
|
|
||||||
source = hit.source or {}
|
|
||||||
content = (
|
|
||||||
source.get("text") or source.get("content") or
|
|
||||||
source.get("page_content") or source.get("chunk_text") or
|
|
||||||
"[Kein Text]"
|
|
||||||
)
|
|
||||||
title = hit.note_id or "Unbekannt"
|
|
||||||
|
|
||||||
payload = hit.payload or {}
|
|
||||||
note_type = payload.get("type") or source.get("type", "unknown")
|
|
||||||
note_type = str(note_type).upper()
|
|
||||||
|
|
||||||
entry = (
|
|
||||||
f"### QUELLE {i}: {title}\n"
|
|
||||||
f"TYP: [{note_type}] (Score: {hit.total_score:.2f})\n"
|
|
||||||
f"INHALT:\n{content}\n"
|
|
||||||
)
|
|
||||||
context_parts.append(entry)
|
|
||||||
|
|
||||||
return "\n\n".join(context_parts)
|
|
||||||
|
|
||||||
def _is_question(query: str) -> bool:
|
def _is_question(query: str) -> bool:
|
||||||
"""Prüft, ob der Input wahrscheinlich eine Frage ist."""
|
"""Prüft, ob der Input eine Frage ist (W-Fragen Erkennung)."""
|
||||||
q = query.strip().lower()
|
q = query.strip().lower()
|
||||||
if "?" in q: return True
|
if "?" in q: return True
|
||||||
|
starters = ["wer", "wie", "was", "wo", "wann", "warum", "weshalb", "wozu", "welche", "bist du"]
|
||||||
# W-Fragen Indikatoren
|
return any(q.startswith(s + " ") for s in starters)
|
||||||
starters = ["wer", "wie", "was", "wo", "wann", "warum", "weshalb", "wozu", "welche", "bist du", "entspricht"]
|
|
||||||
if any(q.startswith(s + " ") for s in starters):
|
|
||||||
return True
|
|
||||||
|
|
||||||
return False
|
|
||||||
|
|
||||||
async def _classify_intent(query: str, llm: LLMService) -> tuple[str, str]:
|
async def _classify_intent(query: str, llm: LLMService) -> tuple[str, str]:
|
||||||
"""
|
"""
|
||||||
Hybrid Router v5:
|
WP-25 Hybrid Router:
|
||||||
1. Decision Keywords (Strategie) -> Prio 1
|
Nutzt erst Keyword-Fast-Paths (Router) und delegiert dann an die DecisionEngine.
|
||||||
2. Type Keywords (Interview Trigger) -> Prio 2
|
|
||||||
3. LLM (Fallback) -> Prio 3
|
|
||||||
"""
|
"""
|
||||||
config = get_full_config()
|
config = get_full_config()
|
||||||
strategies = config.get("strategies", {})
|
strategies = config.get("strategies", {})
|
||||||
settings = config.get("settings", {})
|
|
||||||
|
|
||||||
query_lower = query.lower()
|
query_lower = query.lower()
|
||||||
|
|
||||||
# 1. FAST PATH A: Strategie Keywords
|
# 1. FAST PATH: Keyword Trigger
|
||||||
for intent_name, strategy in strategies.items():
|
for intent_name, strategy in strategies.items():
|
||||||
if intent_name == "FACT": continue
|
|
||||||
keywords = strategy.get("trigger_keywords", [])
|
keywords = strategy.get("trigger_keywords", [])
|
||||||
for k in keywords:
|
for k in keywords:
|
||||||
if k.lower() in query_lower:
|
if k.lower() in query_lower:
|
||||||
return intent_name, "Keyword (Strategy)"
|
return intent_name, "Keyword (FastPath)"
|
||||||
|
|
||||||
# 2. FAST PATH B: Type Keywords -> INTERVIEW
|
# 2. FAST PATH B: Type Keywords -> INTERVIEW
|
||||||
if not _is_question(query_lower):
|
if not _is_question(query_lower):
|
||||||
types_cfg = get_types_config()
|
types_cfg = get_types_config()
|
||||||
types_def = types_cfg.get("types", {})
|
for type_name, type_data in types_cfg.get("types", {}).items():
|
||||||
|
for kw in type_data.get("detection_keywords", []):
|
||||||
for type_name, type_data in types_def.items():
|
|
||||||
keywords = type_data.get("detection_keywords", [])
|
|
||||||
for kw in keywords:
|
|
||||||
if kw.lower() in query_lower:
|
if kw.lower() in query_lower:
|
||||||
return "INTERVIEW", f"Keyword (Type: {type_name})"
|
return "INTERVIEW", "Keyword (Interview)"
|
||||||
|
|
||||||
# 3. SLOW PATH: LLM Router
|
# 3. SLOW PATH: DecisionEngine LLM Router
|
||||||
if settings.get("llm_fallback_enabled", False):
|
intent = await llm.decision_engine._determine_strategy(query)
|
||||||
router_prompt_template = llm.get_prompt("llm_router_prompt")
|
return intent, "DecisionEngine (LLM)"
|
||||||
|
|
||||||
if router_prompt_template:
|
|
||||||
prompt = router_prompt_template.replace("{query}", query)
|
|
||||||
logger.info("Keywords failed (or Question detected). Asking LLM for Intent...")
|
|
||||||
|
|
||||||
try:
|
|
||||||
# FIX: Auch beim Routing keine Retries im Chat-Fluss
|
|
||||||
raw_response = await llm.generate_raw_response(prompt, priority="realtime", max_retries=0)
|
|
||||||
llm_output_upper = raw_response.upper()
|
|
||||||
|
|
||||||
if "INTERVIEW" in llm_output_upper or "CREATE" in llm_output_upper:
|
|
||||||
return "INTERVIEW", "LLM Router"
|
|
||||||
|
|
||||||
for strat_key in strategies.keys():
|
# --- EBENE 3: RETRIEVAL AGGREGATION ---
|
||||||
if strat_key in llm_output_upper:
|
|
||||||
return strat_key, "LLM Router"
|
def _collect_all_hits(stream_responses: Dict[str, Any]) -> List[QueryHit]:
|
||||||
|
"""Sammelt und dedupliziert Treffer aus allen parallelen Streams."""
|
||||||
except Exception as e:
|
all_hits = []
|
||||||
logger.error(f"Router LLM failed: {e}")
|
seen_node_ids = set()
|
||||||
|
for _, response in stream_responses.items():
|
||||||
return "FACT", "Default (No Match)"
|
if hasattr(response, 'results'):
|
||||||
|
for hit in response.results:
|
||||||
|
if hit.node_id not in seen_node_ids:
|
||||||
|
all_hits.append(hit)
|
||||||
|
seen_node_ids.add(hit.node_id)
|
||||||
|
return sorted(all_hits, key=lambda h: h.total_score, reverse=True)
|
||||||
|
|
||||||
|
# --- EBENE 4: ENDPUNKT ---
|
||||||
|
|
||||||
|
def get_llm_service():
|
||||||
|
return LLMService()
|
||||||
|
|
||||||
@router.post("/", response_model=ChatResponse)
|
@router.post("/", response_model=ChatResponse)
|
||||||
async def chat_endpoint(
|
async def chat_endpoint(
|
||||||
request: ChatRequest,
|
request: ChatRequest,
|
||||||
llm: LLMService = Depends(get_llm_service),
|
llm: LLMService = Depends(get_llm_service)
|
||||||
retriever: Retriever = Depends(get_retriever)
|
|
||||||
):
|
):
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
query_id = str(uuid.uuid4())
|
query_id = str(uuid.uuid4())
|
||||||
logger.info(f"Chat request [{query_id}]: {request.message[:50]}...")
|
settings = get_settings()
|
||||||
|
logger.info(f"🚀 [WP-25] Chat request [{query_id}]: {request.message[:50]}...")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# 1. Intent Detection
|
# 1. Intent Detection
|
||||||
intent, intent_source = await _classify_intent(request.message, llm)
|
intent, intent_source = await _classify_intent(request.message, llm)
|
||||||
logger.info(f"[{query_id}] Final Intent: {intent} via {intent_source}")
|
logger.info(f"[{query_id}] Intent: {intent} via {intent_source}")
|
||||||
|
|
||||||
# Strategy Load
|
|
||||||
strategy = get_decision_strategy(intent)
|
strategy = get_decision_strategy(intent)
|
||||||
prompt_key = strategy.get("prompt_template", "rag_template")
|
engine = llm.decision_engine
|
||||||
preferred_provider = strategy.get("preferred_provider")
|
|
||||||
|
|
||||||
sources_hits = []
|
sources_hits = []
|
||||||
final_prompt = ""
|
answer_text = ""
|
||||||
context_str = ""
|
|
||||||
|
# 2. INTERVIEW MODE (Kompatibilität zu v2.7.8)
|
||||||
if intent == "INTERVIEW":
|
if intent == "INTERVIEW":
|
||||||
# --- INTERVIEW MODE ---
|
|
||||||
target_type = _detect_target_type(request.message, strategy.get("schemas", {}))
|
target_type = _detect_target_type(request.message, strategy.get("schemas", {}))
|
||||||
|
|
||||||
types_cfg = get_types_config()
|
types_cfg = get_types_config()
|
||||||
type_def = types_cfg.get("types", {}).get(target_type, {})
|
type_def = types_cfg.get("types", {}).get(target_type, {})
|
||||||
fields_list = type_def.get("schema", [])
|
fields_list = type_def.get("schema", [])
|
||||||
|
|
||||||
if not fields_list:
|
if not fields_list:
|
||||||
configured_schemas = strategy.get("schemas", {})
|
configured_schemas = strategy.get("schemas", {})
|
||||||
fallback_schema = configured_schemas.get(target_type, configured_schemas.get("default"))
|
fallback = configured_schemas.get(target_type, configured_schemas.get("default", {}))
|
||||||
if isinstance(fallback_schema, dict):
|
fields_list = fallback.get("fields", []) if isinstance(fallback, dict) else (fallback or [])
|
||||||
fields_list = fallback_schema.get("fields", [])
|
|
||||||
else:
|
|
||||||
fields_list = fallback_schema or []
|
|
||||||
|
|
||||||
logger.info(f"[{query_id}] Interview Type: {target_type}. Fields: {len(fields_list)}")
|
|
||||||
fields_str = "\n- " + "\n- ".join(fields_list)
|
fields_str = "\n- " + "\n- ".join(fields_list)
|
||||||
|
template = llm.get_prompt(strategy.get("prompt_template", "interview_template"))
|
||||||
|
|
||||||
template = llm.get_prompt(prompt_key)
|
final_prompt = template.replace("{query}", request.message) \
|
||||||
final_prompt = template.replace("{context_str}", "Dialogverlauf...") \
|
|
||||||
.replace("{query}", request.message) \
|
|
||||||
.replace("{target_type}", target_type) \
|
.replace("{target_type}", target_type) \
|
||||||
.replace("{schema_fields}", fields_str) \
|
.replace("{schema_fields}", fields_str)
|
||||||
.replace("{schema_hint}", "")
|
|
||||||
sources_hits = []
|
|
||||||
|
|
||||||
else:
|
|
||||||
# --- RAG MODE (FACT, DECISION, EMPATHY, CODING) ---
|
|
||||||
inject_types = strategy.get("inject_types", [])
|
|
||||||
prepend_instr = strategy.get("prepend_instruction", "")
|
|
||||||
edge_boosts = strategy.get("edge_boosts", {})
|
|
||||||
|
|
||||||
query_req = QueryRequest(
|
|
||||||
query=request.message,
|
|
||||||
mode="hybrid",
|
|
||||||
top_k=request.top_k,
|
|
||||||
explain=request.explain,
|
|
||||||
boost_edges=edge_boosts
|
|
||||||
)
|
|
||||||
retrieve_result = await retriever.search(query_req)
|
|
||||||
hits = retrieve_result.results
|
|
||||||
|
|
||||||
if inject_types:
|
|
||||||
strategy_req = QueryRequest(
|
|
||||||
query=request.message,
|
|
||||||
mode="hybrid",
|
|
||||||
top_k=3,
|
|
||||||
filters={"type": inject_types},
|
|
||||||
explain=False,
|
|
||||||
boost_edges=edge_boosts
|
|
||||||
)
|
|
||||||
strategy_result = await retriever.search(strategy_req)
|
|
||||||
existing_ids = {h.node_id for h in hits}
|
|
||||||
for strat_hit in strategy_result.results:
|
|
||||||
if strat_hit.node_id not in existing_ids:
|
|
||||||
hits.append(strat_hit)
|
|
||||||
|
|
||||||
context_str = _build_enriched_context(hits) if hits else "Keine relevanten Notizen gefunden."
|
|
||||||
|
|
||||||
# --- STABILITY FIX: OLLAMA CONTEXT THROTTLE ---
|
|
||||||
# Begrenzt den Text, um den "decode: cannot decode batches" Fehler zu vermeiden.
|
|
||||||
# MAX_OLLAMA_CHARS = 10000
|
|
||||||
|
|
||||||
settings = get_settings() # Falls noch nicht im Scope vorhanden
|
|
||||||
max_chars = getattr(settings, "MAX_OLLAMA_CHARS", 10000)
|
|
||||||
if preferred_provider == "ollama" and len(context_str) > max_chars:
|
|
||||||
logger.warning(f"⚠️ [{query_id}] Context zu groß für Ollama ({len(context_str)} chars). Kürze auf {max_chars}.")
|
|
||||||
context_str = context_str[:max_chars] + "\n[...gekürzt zur Stabilität...]"
|
|
||||||
|
|
||||||
template = llm.get_prompt(prompt_key) or "{context_str}\n\n{query}"
|
|
||||||
|
|
||||||
if prepend_instr:
|
|
||||||
context_str = f"{prepend_instr}\n\n{context_str}"
|
|
||||||
|
|
||||||
final_prompt = template.replace("{context_str}", context_str).replace("{query}", request.message)
|
|
||||||
sources_hits = hits
|
|
||||||
|
|
||||||
# --- DEBUG SPOT 1: PROMPT CONSTRUCTION ---
|
|
||||||
logger.info(f"[{query_id}] PROMPT CONSTRUCTION COMPLETE. Length: {len(final_prompt)} chars.")
|
|
||||||
if not final_prompt.strip():
|
|
||||||
logger.error(f"[{query_id}] CRITICAL: Final prompt is empty before sending to LLM!")
|
|
||||||
|
|
||||||
# --- GENERATION WITH NO-RETRY & DEEP FALLBACK ---
|
|
||||||
system_prompt = llm.get_prompt("system_prompt")
|
|
||||||
|
|
||||||
# --- DEBUG SPOT 2: PRIMARY CALL ---
|
|
||||||
logger.info(f"[{query_id}] PRIMARY CALL: Sending request to provider '{preferred_provider}' (No Retries)...")
|
|
||||||
|
|
||||||
answer_text = ""
|
|
||||||
try:
|
|
||||||
# FIX: max_retries=0 verhindert Hänger durch Retry-Kaskaden im Chat
|
|
||||||
answer_text = await llm.generate_raw_response(
|
answer_text = await llm.generate_raw_response(
|
||||||
prompt=final_prompt,
|
final_prompt, system=llm.get_prompt("system_prompt"),
|
||||||
system=system_prompt,
|
priority="realtime", provider=strategy.get("preferred_provider"), max_retries=0
|
||||||
priority="realtime",
|
|
||||||
provider=preferred_provider,
|
|
||||||
max_retries=0
|
|
||||||
)
|
)
|
||||||
except Exception as e:
|
sources_hits = []
|
||||||
logger.error(f"🛑 [{query_id}] Primary Provider '{preferred_provider}' failed: {e}")
|
|
||||||
|
|
||||||
# DEEP FALLBACK: Wenn die Antwort leer ist (Silent Refusal) oder der Primary abgestürzt ist
|
# 3. RAG MODE (WP-25 Multi-Stream)
|
||||||
if not answer_text.strip() and preferred_provider != "ollama":
|
else:
|
||||||
# --- DEBUG SPOT 3: FALLBACK TRIGGER ---
|
stream_keys = strategy.get("use_streams", [])
|
||||||
logger.warning(f"🛑 [{query_id}] PRIMARY '{preferred_provider}' returned EMPTY or FAILED. Triggering Deep Fallback to Ollama...")
|
library = engine.config.get("streams_library", {})
|
||||||
|
|
||||||
try:
|
tasks = []
|
||||||
answer_text = await llm.generate_raw_response(
|
active_streams = []
|
||||||
prompt=final_prompt,
|
for key in stream_keys:
|
||||||
system=system_prompt,
|
stream_cfg = library.get(key)
|
||||||
priority="realtime",
|
if stream_cfg:
|
||||||
provider="ollama",
|
active_streams.append(key)
|
||||||
max_retries=0
|
tasks.append(engine._run_single_stream(key, stream_cfg, request.message))
|
||||||
)
|
|
||||||
except Exception as e:
|
import asyncio
|
||||||
logger.error(f"🛑 [{query_id}] Deep Fallback to Ollama also failed: {e}")
|
responses = await asyncio.gather(*tasks, return_exceptions=True)
|
||||||
answer_text = "Entschuldigung, das System ist aktuell überlastet. Bitte versuche es in einem Moment erneut."
|
|
||||||
|
raw_stream_map = {}
|
||||||
|
formatted_context_map = {}
|
||||||
|
max_chars = getattr(settings, "MAX_OLLAMA_CHARS", 10000)
|
||||||
|
provider = strategy.get("preferred_provider") or settings.MINDNET_LLM_PROVIDER
|
||||||
|
|
||||||
|
for name, res in zip(active_streams, responses):
|
||||||
|
if not isinstance(res, Exception):
|
||||||
|
raw_stream_map[name] = res
|
||||||
|
context_text = engine._format_stream_context(res)
|
||||||
|
|
||||||
|
# WP-20 Stability Fix: Throttling
|
||||||
|
if provider == "ollama" and len(context_text) > max_chars:
|
||||||
|
context_text = context_text[:max_chars] + "\n[...]"
|
||||||
|
|
||||||
|
formatted_context_map[name] = context_text
|
||||||
|
|
||||||
|
answer_text = await engine._generate_final_answer(
|
||||||
|
intent, strategy, request.message, formatted_context_map
|
||||||
|
)
|
||||||
|
sources_hits = _collect_all_hits(raw_stream_map)
|
||||||
|
|
||||||
duration_ms = int((time.time() - start_time) * 1000)
|
duration_ms = int((time.time() - start_time) * 1000)
|
||||||
|
|
||||||
# Logging
|
# Logging
|
||||||
try:
|
try:
|
||||||
log_search(
|
log_search(
|
||||||
query_id=query_id,
|
query_id=query_id, query_text=request.message, results=sources_hits,
|
||||||
query_text=request.message,
|
mode=f"wp25_{intent.lower()}", metadata={"strategy": intent, "source": intent_source}
|
||||||
results=sources_hits,
|
|
||||||
mode="interview" if intent == "INTERVIEW" else "chat_rag",
|
|
||||||
metadata={"intent": intent, "source": intent_source, "provider": preferred_provider}
|
|
||||||
)
|
)
|
||||||
except: pass
|
except: pass
|
||||||
|
|
||||||
return ChatResponse(
|
return ChatResponse(
|
||||||
query_id=query_id,
|
query_id=query_id, answer=answer_text, sources=sources_hits,
|
||||||
answer=answer_text,
|
latency_ms=duration_ms, intent=intent, intent_source=intent_source
|
||||||
sources=sources_hits,
|
|
||||||
latency_ms=duration_ms,
|
|
||||||
intent=intent,
|
|
||||||
intent_source=intent_source
|
|
||||||
)
|
)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error in chat endpoint: {e}", exc_info=True)
|
logger.error(f"❌ Chat Endpoint Failure: {e}", exc_info=True)
|
||||||
# Wir geben eine benutzerfreundliche Meldung zurück, statt nur den Error-Stack
|
raise HTTPException(status_code=500, detail="Fehler bei der Verarbeitung.")
|
||||||
raise HTTPException(status_code=500, detail="Das System konnte die Anfrage nicht verarbeiten.")
|
|
||||||
|
|
@ -6,11 +6,13 @@ DESCRIPTION: Hybrid-Client für Ollama, Google GenAI (Gemini) und OpenRouter.
|
||||||
WP-20 Fix: Bulletproof Prompt-Auflösung für format() Aufrufe.
|
WP-20 Fix: Bulletproof Prompt-Auflösung für format() Aufrufe.
|
||||||
WP-22/JSON: Optionales JSON-Schema + strict (für OpenRouter structured outputs).
|
WP-22/JSON: Optionales JSON-Schema + strict (für OpenRouter structured outputs).
|
||||||
FIX: Intelligente Rate-Limit Erkennung (429 Handling), v1-API Sync & Timeouts.
|
FIX: Intelligente Rate-Limit Erkennung (429 Handling), v1-API Sync & Timeouts.
|
||||||
VERSION: 3.3.9
|
WP-25: Integration der DecisionEngine für Agentic Multi-Stream RAG.
|
||||||
|
VERSION: 3.4.1
|
||||||
STATUS: Active
|
STATUS: Active
|
||||||
FIX:
|
FIX:
|
||||||
- Importiert clean_llm_text von app.core.registry zur Vermeidung von Circular Imports.
|
- 100% Wiederherstellung der v3.3.9 Logik (Rate-Limits, Retries, Async-Threads).
|
||||||
- Wendet clean_llm_text auf Text-Antworten in generate_raw_response an.
|
- Integration des WP-25 DecisionEngine Bridges in generate_rag_response.
|
||||||
|
- WP-25 Empty-Response-Guard für Cloud-Provider.
|
||||||
"""
|
"""
|
||||||
import httpx
|
import httpx
|
||||||
import yaml
|
import yaml
|
||||||
|
|
@ -29,7 +31,6 @@ from app.core.registry import clean_llm_text
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class LLMService:
|
class LLMService:
|
||||||
# GLOBALER SEMAPHOR für Hintergrund-Last Steuerung (WP-06)
|
# GLOBALER SEMAPHOR für Hintergrund-Last Steuerung (WP-06)
|
||||||
_background_semaphore = None
|
_background_semaphore = None
|
||||||
|
|
@ -37,6 +38,9 @@ class LLMService:
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.settings = get_settings()
|
self.settings = get_settings()
|
||||||
self.prompts = self._load_prompts()
|
self.prompts = self._load_prompts()
|
||||||
|
|
||||||
|
# WP-25: Lazy Initialization der DecisionEngine zur Vermeidung von Circular Imports
|
||||||
|
self._decision_engine = None
|
||||||
|
|
||||||
# Initialisiere Semaphore einmalig auf Klassen-Ebene
|
# Initialisiere Semaphore einmalig auf Klassen-Ebene
|
||||||
if LLMService._background_semaphore is None:
|
if LLMService._background_semaphore is None:
|
||||||
|
|
@ -71,6 +75,14 @@ class LLMService:
|
||||||
)
|
)
|
||||||
logger.info("🛰️ LLMService: OpenRouter Integration active.")
|
logger.info("🛰️ LLMService: OpenRouter Integration active.")
|
||||||
|
|
||||||
|
@property
|
||||||
|
def decision_engine(self):
|
||||||
|
"""Lazy Initialization der Decision Engine (WP-25)."""
|
||||||
|
if self._decision_engine is None:
|
||||||
|
from app.core.retrieval.decision_engine import DecisionEngine
|
||||||
|
self._decision_engine = DecisionEngine()
|
||||||
|
return self._decision_engine
|
||||||
|
|
||||||
def _load_prompts(self) -> dict:
|
def _load_prompts(self) -> dict:
|
||||||
"""Lädt die Prompt-Konfiguration aus der YAML-Datei."""
|
"""Lädt die Prompt-Konfiguration aus der YAML-Datei."""
|
||||||
path = Path(self.settings.PROMPTS_PATH)
|
path = Path(self.settings.PROMPTS_PATH)
|
||||||
|
|
@ -132,14 +144,18 @@ class LLMService:
|
||||||
max_retries, base_delay, model_override,
|
max_retries, base_delay, model_override,
|
||||||
json_schema, json_schema_name, strict_json_schema
|
json_schema, json_schema_name, strict_json_schema
|
||||||
)
|
)
|
||||||
# WP-14 Fix: Bereinige Text-Antworten vor Rückgabe
|
else:
|
||||||
return clean_llm_text(res) if not force_json else res
|
res = await self._dispatch(
|
||||||
|
target_provider, prompt, system, force_json,
|
||||||
|
max_retries, base_delay, model_override,
|
||||||
|
json_schema, json_schema_name, strict_json_schema
|
||||||
|
)
|
||||||
|
|
||||||
|
# WP-25 Empty Response Fix: Wenn Cloud-Provider leer antworten, Fallback auf Ollama
|
||||||
|
if (not res or len(res.strip()) < 5) and target_provider != "ollama":
|
||||||
|
logger.warning(f"⚠️ [WP-25] Empty response from {target_provider}. Falling back to OLLAMA.")
|
||||||
|
res = await self._execute_ollama(prompt, system, force_json, max_retries, base_delay)
|
||||||
|
|
||||||
res = await self._dispatch(
|
|
||||||
target_provider, prompt, system, force_json,
|
|
||||||
max_retries, base_delay, model_override,
|
|
||||||
json_schema, json_schema_name, strict_json_schema
|
|
||||||
)
|
|
||||||
# WP-14 Fix: Bereinige Text-Antworten vor Rückgabe
|
# WP-14 Fix: Bereinige Text-Antworten vor Rückgabe
|
||||||
return clean_llm_text(res) if not force_json else res
|
return clean_llm_text(res) if not force_json else res
|
||||||
|
|
||||||
|
|
@ -295,21 +311,16 @@ class LLMService:
|
||||||
logger.warning(f"⚠️ Ollama attempt {attempt} failed. Retrying in {wait_time}s...")
|
logger.warning(f"⚠️ Ollama attempt {attempt} failed. Retrying in {wait_time}s...")
|
||||||
await asyncio.sleep(wait_time)
|
await asyncio.sleep(wait_time)
|
||||||
|
|
||||||
async def generate_rag_response(self, query: str, context_str: str) -> str:
|
async def generate_rag_response(self, query: str, context_str: Optional[str] = None) -> str:
|
||||||
"""Vollständiges RAG Chat-Interface."""
|
"""
|
||||||
provider = self.settings.MINDNET_LLM_PROVIDER
|
WP-25 UPDATE: Der primäre Einstiegspunkt für den MindNet Chat.
|
||||||
system_prompt = self.get_prompt("system_prompt", provider)
|
Delegiert nun an die DecisionEngine für Agentic Multi-Stream RAG.
|
||||||
rag_template = self.get_prompt("rag_template", provider)
|
Falls context_str bereits vorhanden ist (Legacy), wird dieser ignoriert zugunsten
|
||||||
|
der präzisen Multi-Stream Orchestrierung.
|
||||||
final_prompt = rag_template.format(context_str=context_str, query=query)
|
"""
|
||||||
|
logger.info(f"🚀 [WP-25] Chat Query intercepted: {query[:50]}...")
|
||||||
# RAG Aufrufe im Chat nutzen nun standardmäßig max_retries=2 (überschreibbar)
|
# Die DecisionEngine übernimmt nun das gesamte Management (Routing, Retrieval, Synthesis)
|
||||||
# Durch den Aufruf von generate_raw_response wird die Bereinigung automatisch angewendet.
|
return await self.decision_engine.ask(query)
|
||||||
return await self.generate_raw_response(
|
|
||||||
final_prompt,
|
|
||||||
system=system_prompt,
|
|
||||||
priority="realtime"
|
|
||||||
)
|
|
||||||
|
|
||||||
async def close(self):
|
async def close(self):
|
||||||
"""Schließt die HTTP-Verbindungen."""
|
"""Schließt die HTTP-Verbindungen."""
|
||||||
|
|
|
||||||
|
|
@ -1,145 +1,112 @@
|
||||||
# config/decision_engine.yaml
|
# config/decision_engine.yaml
|
||||||
# Steuerung der Decision Engine (Intent Recognition & Graph Routing)
|
# VERSION: 3.1.2 (WP-25: Multi-Stream Agentic RAG)
|
||||||
# VERSION: 2.6.1 (WP-20: Hybrid LLM & WP-22: Semantic Graph Routing)
|
|
||||||
# STATUS: Active
|
# STATUS: Active
|
||||||
# DoD: Keine Hardcoded Modelle, volle Integration der strategischen Boosts.
|
# DoD: Strikte Trennung von Logik und Instruktion. Prompt in prompts.yaml verschoben.
|
||||||
|
|
||||||
version: 2.6
|
version: 3.1
|
||||||
|
|
||||||
settings:
|
settings:
|
||||||
llm_fallback_enabled: true
|
llm_fallback_enabled: true
|
||||||
|
router_provider: "auto"
|
||||||
# Strategie für den Router selbst (Welches Modell erkennt den Intent?)
|
# Der Prompt-Key für den Router in prompts.yaml
|
||||||
# "auto" nutzt den in MINDNET_LLM_PROVIDER gesetzten Standard (z.B. openrouter).
|
router_prompt_key: "intent_router_v1"
|
||||||
router_provider: "auto"
|
|
||||||
|
|
||||||
# Few-Shot Prompting für den LLM-Router
|
# --- EBENE 1: STREAM-LIBRARY (Bausteine) ---
|
||||||
llm_router_prompt: |
|
streams_library:
|
||||||
Du bist der zentrale Intent-Klassifikator für Mindnet, einen digitalen Zwilling.
|
values_stream:
|
||||||
Analysiere die Nachricht und wähle die passende Strategie.
|
name: "Identität & Ethik"
|
||||||
Antworte NUR mit dem Namen der Strategie.
|
query_template: "Welche meiner Werte und Prinzipien betreffen: {query}"
|
||||||
|
filter_types: ["value", "principle", "belief"]
|
||||||
STRATEGIEN:
|
top_k: 5
|
||||||
- INTERVIEW: User will Wissen erfassen, Notizen anlegen oder Dinge festhalten.
|
edge_boosts:
|
||||||
- DECISION: Rat, Strategie, Abwägung von Werten, "Soll ich tun X?".
|
guides: 3.0
|
||||||
- EMPATHY: Gefühle, Reflexion der eigenen Verfassung, Frust, Freude.
|
enforced_by: 2.5
|
||||||
- CODING: Code-Erstellung, Debugging, technische Dokumentation.
|
based_on: 2.0
|
||||||
- FACT: Reine Wissensabfrage, Definitionen, Suchen von Informationen.
|
|
||||||
|
|
||||||
BEISPIELE:
|
|
||||||
User: "Wie funktioniert die Qdrant-Vektor-DB?" -> FACT
|
|
||||||
User: "Soll ich mein Startup jetzt verkaufen?" -> DECISION
|
|
||||||
User: "Notiere mir kurz meine Gedanken zum Meeting." -> INTERVIEW
|
|
||||||
User: "Ich fühle mich heute sehr erschöpft." -> EMPATHY
|
|
||||||
User: "Schreibe eine FastAPI-Route für den Ingest." -> CODING
|
|
||||||
|
|
||||||
NACHRICHT: "{query}"
|
|
||||||
|
|
||||||
STRATEGIE:
|
|
||||||
|
|
||||||
strategies:
|
facts_stream:
|
||||||
# 1. Fakten-Abfrage (Turbo-Modus via OpenRouter / Primary)
|
name: "Operative Realität"
|
||||||
FACT:
|
query_template: "Status, Ressourcen und Fakten zu: {query}"
|
||||||
description: "Reine Wissensabfrage."
|
filter_types: ["project", "decision", "resource", "task", "milestone"]
|
||||||
preferred_provider: "openrouter"
|
top_k: 5
|
||||||
trigger_keywords: []
|
|
||||||
inject_types: []
|
|
||||||
# WP-22: Definitionen & Hierarchien im Graphen bevorzugen
|
|
||||||
edge_boosts:
|
edge_boosts:
|
||||||
part_of: 2.0
|
part_of: 2.0
|
||||||
composed_of: 2.0
|
depends_on: 1.5
|
||||||
similar_to: 1.5
|
implemented_in: 1.5
|
||||||
caused_by: 0.5
|
|
||||||
prompt_template: "rag_template"
|
|
||||||
prepend_instruction: null
|
|
||||||
|
|
||||||
# 2. Entscheidungs-Frage (Power-Strategie via Gemini)
|
biography_stream:
|
||||||
DECISION:
|
name: "Persönliche Erfahrung"
|
||||||
description: "Der User sucht Rat, Strategie oder Abwägung."
|
query_template: "Welche Erlebnisse habe ich im Kontext von {query} gemacht?"
|
||||||
preferred_provider: "gemini"
|
filter_types: ["experience", "journal"]
|
||||||
trigger_keywords:
|
top_k: 3
|
||||||
- "soll ich"
|
edge_boosts:
|
||||||
- "meinung"
|
related_to: 1.5
|
||||||
- "besser"
|
experienced_in: 2.0
|
||||||
- "empfehlung"
|
|
||||||
- "strategie"
|
risk_stream:
|
||||||
- "entscheidung"
|
name: "Risiko-Radar"
|
||||||
- "abwägung"
|
query_template: "Gefahren, Hindernisse oder Risiken bei: {query}"
|
||||||
- "vergleich"
|
filter_types: ["risk", "obstacle"]
|
||||||
inject_types: ["value", "principle", "goal", "risk"]
|
top_k: 3
|
||||||
# WP-22: Risiken und Konsequenzen im Graphen priorisieren
|
|
||||||
edge_boosts:
|
edge_boosts:
|
||||||
blocks: 2.5
|
blocks: 2.5
|
||||||
solves: 2.0
|
|
||||||
depends_on: 1.5
|
|
||||||
risk_of: 2.5
|
|
||||||
impacts: 2.0
|
impacts: 2.0
|
||||||
prompt_template: "decision_template"
|
risk_of: 2.5
|
||||||
prepend_instruction: |
|
|
||||||
!!! ENTSCHEIDUNGS-MODUS (HYBRID AI) !!!
|
|
||||||
BITTE WÄGE FAKTEN GEGEN FOLGENDE WERTE, PRINZIPIEN UND ZIELE AB:
|
|
||||||
|
|
||||||
# 3. Empathie / "Ich"-Modus (Lokal & Privat via Ollama)
|
tech_stream:
|
||||||
EMPATHY:
|
name: "Technische Referenz"
|
||||||
description: "Reaktion auf emotionale Zustände."
|
query_template: "Technische Dokumentation und Code-Beispiele für: {query}"
|
||||||
preferred_provider: "openrouter"
|
filter_types: ["snippet", "reference", "source"]
|
||||||
trigger_keywords:
|
top_k: 5
|
||||||
- "ich fühle"
|
|
||||||
- "traurig"
|
|
||||||
- "glücklich"
|
|
||||||
- "gestresst"
|
|
||||||
- "angst"
|
|
||||||
- "nervt"
|
|
||||||
- "überfordert"
|
|
||||||
- "müde"
|
|
||||||
inject_types: ["experience", "belief", "profile"]
|
|
||||||
edge_boosts:
|
|
||||||
based_on: 2.0
|
|
||||||
related_to: 2.0
|
|
||||||
experienced_in: 2.5
|
|
||||||
blocks: 0.1
|
|
||||||
prompt_template: "empathy_template"
|
|
||||||
prepend_instruction: null
|
|
||||||
|
|
||||||
# 4. Coding / Technical (Gemini Power)
|
|
||||||
CODING:
|
|
||||||
description: "Technische Anfragen und Programmierung."
|
|
||||||
preferred_provider: "gemini"
|
|
||||||
trigger_keywords:
|
|
||||||
- "code"
|
|
||||||
- "python"
|
|
||||||
- "script"
|
|
||||||
- "funktion"
|
|
||||||
- "bug"
|
|
||||||
- "syntax"
|
|
||||||
- "json"
|
|
||||||
- "yaml"
|
|
||||||
- "bash"
|
|
||||||
inject_types: ["snippet", "reference", "source"]
|
|
||||||
# WP-22: Technische Abhängigkeiten priorisieren
|
|
||||||
edge_boosts:
|
edge_boosts:
|
||||||
uses: 2.5
|
uses: 2.5
|
||||||
depends_on: 2.0
|
|
||||||
implemented_in: 3.0
|
implemented_in: 3.0
|
||||||
prompt_template: "technical_template"
|
|
||||||
prepend_instruction: null
|
|
||||||
|
|
||||||
# 5. Interview / Datenerfassung (Lokal)
|
# --- EBENE 2: STRATEGIEN (Orchestrierung) ---
|
||||||
|
strategies:
|
||||||
|
FACT_WHEN:
|
||||||
|
description: "Abfrage von Zeitpunkten und Historie."
|
||||||
|
preferred_provider: "openrouter"
|
||||||
|
use_streams:
|
||||||
|
- "facts_stream"
|
||||||
|
- "biography_stream"
|
||||||
|
prompt_template: "fact_synthesis_v1"
|
||||||
|
|
||||||
|
FACT_WHAT:
|
||||||
|
description: "Abfrage von Definitionen und Wissen."
|
||||||
|
preferred_provider: "openrouter"
|
||||||
|
use_streams:
|
||||||
|
- "facts_stream"
|
||||||
|
- "tech_stream"
|
||||||
|
prompt_template: "fact_synthesis_v1"
|
||||||
|
|
||||||
|
DECISION:
|
||||||
|
description: "Der User sucht Rat, Strategie oder Abwägung."
|
||||||
|
preferred_provider: "gemini"
|
||||||
|
use_streams:
|
||||||
|
- "values_stream"
|
||||||
|
- "facts_stream"
|
||||||
|
- "risk_stream"
|
||||||
|
prompt_template: "decision_synthesis_v1"
|
||||||
|
prepend_instruction: "!!! ENTSCHEIDUNGS-MODUS (AGENTIC MULTI-STREAM) !!!"
|
||||||
|
|
||||||
|
EMPATHY:
|
||||||
|
description: "Reaktion auf emotionale Zustände."
|
||||||
|
preferred_provider: "openrouter"
|
||||||
|
use_streams:
|
||||||
|
- "biography_stream"
|
||||||
|
- "values_stream"
|
||||||
|
prompt_template: "empathy_template"
|
||||||
|
|
||||||
|
CODING:
|
||||||
|
description: "Technische Anfragen und Programmierung."
|
||||||
|
preferred_provider: "gemini"
|
||||||
|
use_streams:
|
||||||
|
- "tech_stream"
|
||||||
|
- "facts_stream"
|
||||||
|
prompt_template: "technical_template"
|
||||||
|
|
||||||
INTERVIEW:
|
INTERVIEW:
|
||||||
description: "Der User möchte Wissen erfassen."
|
description: "Der User möchte Wissen erfassen."
|
||||||
preferred_provider: "openrouter"
|
preferred_provider: "openrouter"
|
||||||
trigger_keywords:
|
use_streams: []
|
||||||
- "neue notiz"
|
prompt_template: "interview_template"
|
||||||
- "etwas notieren"
|
|
||||||
- "festhalten"
|
|
||||||
- "erstellen"
|
|
||||||
- "dokumentieren"
|
|
||||||
- "anlegen"
|
|
||||||
- "interview"
|
|
||||||
- "erfassen"
|
|
||||||
- "idee speichern"
|
|
||||||
- "draft"
|
|
||||||
inject_types: []
|
|
||||||
edge_boosts: {}
|
|
||||||
prompt_template: "interview_template"
|
|
||||||
prepend_instruction: null
|
|
||||||
|
|
@ -1,7 +1,6 @@
|
||||||
# config/prompts.yaml — Final V2.6.0 (WP-15b Candidate-Validation)
|
# config/prompts.yaml — VERSION 3.0.0 (WP-25: Multi-Stream Agentic RAG)
|
||||||
# WP-20: Optimierte Cloud-Templates zur Unterdrückung von Modell-Geschwätz.
|
# WP-20/22: Cloud-Templates & Semantic Graph Routing erhalten.
|
||||||
# FIX: Explizite Verbote für Einleitungstexte zur Vermeidung von JSON-Parsing-Fehlern.
|
# WP-25: Integration der Multi-Stream Synthese zur Vermeidung von Halluzinationen.
|
||||||
# WP-15b: Integration der binären edge_validation für den Two-Pass Workflow.
|
|
||||||
# OLLAMA: UNVERÄNDERT laut Benutzeranweisung.
|
# OLLAMA: UNVERÄNDERT laut Benutzeranweisung.
|
||||||
|
|
||||||
system_prompt: |
|
system_prompt: |
|
||||||
|
|
@ -270,4 +269,88 @@ edge_validation:
|
||||||
QUELLE: {chunk_text}
|
QUELLE: {chunk_text}
|
||||||
ZIEL: {target_title} ({target_summary})
|
ZIEL: {target_title} ({target_summary})
|
||||||
BEZIEHUNG: {edge_kind}
|
BEZIEHUNG: {edge_kind}
|
||||||
Ist diese Verbindung valide? Antworte NUR mit YES oder NO.
|
Ist diese Verbindung valide? Antworte NUR mit YES oder NO.
|
||||||
|
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
# 9. WP-25: MULTI-STREAM SYNTHESIS (Intent: SYNTHESIS)
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
# Diese Templates verarbeiten die Ergebnisse aus parallelen Such-Streams.
|
||||||
|
|
||||||
|
decision_synthesis_v1:
|
||||||
|
gemini: |
|
||||||
|
Agiere als mein strategischer Partner. Analysiere die Frage: {query}
|
||||||
|
|
||||||
|
Hier sind die Ergebnisse aus verschiedenen Wissens-Streams meiner Mindnet-Basis:
|
||||||
|
|
||||||
|
### STREAM: WERTE & PRINZIPIEN (Identität)
|
||||||
|
{values_stream}
|
||||||
|
|
||||||
|
### STREAM: OPERATIVE FAKTEN (Realität)
|
||||||
|
{facts_stream}
|
||||||
|
|
||||||
|
### STREAM: RISIKO-ANALYSE (Konsequenzen)
|
||||||
|
{risk_stream}
|
||||||
|
|
||||||
|
AUFGABE:
|
||||||
|
1. Fasse die Faktenlage kurz zusammen.
|
||||||
|
2. Wäge die Fakten hart gegen meine Werte ab. Gibt es Konflikte?
|
||||||
|
3. Beurteile das Vorhaben basierend auf dem Risiko-Radar.
|
||||||
|
4. Gib eine klare strategische Empfehlung ab.
|
||||||
|
openrouter: |
|
||||||
|
Strategische Multi-Stream Analyse für: {query}
|
||||||
|
Werte-Basis: {values_stream}
|
||||||
|
Fakten: {facts_stream}
|
||||||
|
Risiken: {risk_stream}
|
||||||
|
Bitte wäge ab und gib eine Empfehlung.
|
||||||
|
ollama: |
|
||||||
|
Du bist mein Entscheidungs-Partner. Analysiere {query} basierend auf diesen Streams:
|
||||||
|
WERTE: {values_stream}
|
||||||
|
FAKTEN: {facts_stream}
|
||||||
|
RISIKEN: {risk_stream}
|
||||||
|
Wäge die Fakten gegen die Werte ab und nenne potenzielle Risiken. Nenne Quellen!
|
||||||
|
|
||||||
|
fact_synthesis_v1:
|
||||||
|
gemini: |
|
||||||
|
Beantworte die Wissensabfrage "{query}" basierend auf diesen Streams:
|
||||||
|
FAKTEN: {facts_stream}
|
||||||
|
BIOGRAFIE/ERFAHRUNG: {biography_stream}
|
||||||
|
TECHNIK: {tech_stream}
|
||||||
|
Kombiniere harte Fakten mit persönlichen Erfahrungen, falls vorhanden.
|
||||||
|
openrouter: |
|
||||||
|
Synthese der Wissens-Streams für: {query}
|
||||||
|
Inhalt: {facts_stream} | {biography_stream} | {tech_stream}
|
||||||
|
ollama: |
|
||||||
|
Fasse das Wissen zu {query} zusammen.
|
||||||
|
QUELLE FAKTEN: {facts_stream}
|
||||||
|
QUELLE ERFAHRUNG: {biography_stream}
|
||||||
|
QUELLE TECHNIK: {tech_stream}
|
||||||
|
Antworte präzise und nenne die Quellen.
|
||||||
|
|
||||||
|
# ... (Vorherige Sektionen 1-9 bleiben identisch)
|
||||||
|
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
# 10. WP-25: INTENT ROUTING (Intent: CLASSIFY)
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
intent_router_v1:
|
||||||
|
ollama: |
|
||||||
|
Analysiere die Nutzeranfrage und wähle die passende Strategie.
|
||||||
|
Antworte NUR mit dem Namen der Strategie.
|
||||||
|
|
||||||
|
STRATEGIEN:
|
||||||
|
- FACT_WHEN: Fragen nach "Wann", Daten, Historie.
|
||||||
|
- FACT_WHAT: Fragen nach "Was", Definitionen, Wissen.
|
||||||
|
- DECISION: Rat, Meinung, "Soll ich?", Abwägung.
|
||||||
|
- EMPATHY: Emotionen, Reflexion, "Ich fühle mich...".
|
||||||
|
- CODING: Programmierung, Skripte, Debugging.
|
||||||
|
- INTERVIEW: Dokumentation von Gedanken, Notizen erstellen.
|
||||||
|
|
||||||
|
NACHRICHT: "{query}"
|
||||||
|
STRATEGIE:
|
||||||
|
gemini: |
|
||||||
|
Classify query intent for Mindnet. Options: [FACT_WHEN, FACT_WHAT, DECISION, EMPATHY, CODING, INTERVIEW].
|
||||||
|
Query: "{query}"
|
||||||
|
Result (One word only):
|
||||||
|
openrouter: |
|
||||||
|
Select the best Mindnet strategy for: "{query}".
|
||||||
|
Strategies: FACT_WHEN, FACT_WHAT, DECISION, EMPATHY, CODING, INTERVIEW.
|
||||||
|
Response:
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
# Release Notes: Mindnet v2.9.1 (WP15c)
|
# Release Notes: Mindnet v2.9.3 (WP15c)
|
||||||
|
|
||||||
**Release Date:** 2025-12-31
|
**Release Date:** 2025-12-31
|
||||||
**Type:** Feature Release - Multigraph & Diversity Engine
|
**Type:** Feature Release - Multigraph & Diversity Engine
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user