WP20 initial

This commit is contained in:
Lars 2025-12-23 14:33:51 +01:00
parent 2d43e0596c
commit 234949800b
7 changed files with 383 additions and 307 deletions

View File

@ -1,10 +1,11 @@
"""
FILE: app/config.py
DESCRIPTION: Zentrale Pydantic-Konfiguration (Env-Vars für Qdrant, LLM, Retriever).
VERSION: 0.4.0
Erweitert um WP-20 Hybrid-Optionen.
VERSION: 0.5.0
STATUS: Active
DEPENDENCIES: os, functools, pathlib
LAST_ANALYSIS: 2025-12-15
LAST_ANALYSIS: 2025-12-23
"""
from __future__ import annotations
import os
@ -12,29 +13,38 @@ from functools import lru_cache
from pathlib import Path
class Settings:
# Qdrant
# Qdrant Verbindung
QDRANT_URL: str = os.getenv("QDRANT_URL", "http://127.0.0.1:6333")
QDRANT_API_KEY: str | None = os.getenv("QDRANT_API_KEY")
COLLECTION_PREFIX: str = os.getenv("MINDNET_PREFIX", "mindnet")
VECTOR_SIZE: int = int(os.getenv("MINDNET_VECTOR_SIZE", "384"))
DISTANCE: str = os.getenv("MINDNET_DISTANCE", "Cosine")
# Embeddings
# Embeddings (lokal)
MODEL_NAME: str = os.getenv("MINDNET_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
# WP-05 LLM / Ollama
# WP-20 Hybrid LLM Provider
# Erlaubt: "ollama" oder "gemini"
MINDNET_LLM_PROVIDER: str = os.getenv("MINDNET_LLM_PROVIDER", "ollama").lower()
GOOGLE_API_KEY: str | None = os.getenv("GOOGLE_API_KEY")
GEMINI_MODEL: str = os.getenv("MINDNET_GEMINI_MODEL", "gemini-1.5-flash")
LLM_FALLBACK_ENABLED: bool = os.getenv("MINDNET_LLM_FALLBACK", "true").lower() == "true"
# WP-05 LLM / Ollama (Local)
OLLAMA_URL: str = os.getenv("MINDNET_OLLAMA_URL", "http://127.0.0.1:11434")
LLM_MODEL: str = os.getenv("MINDNET_LLM_MODEL", "phi3:mini")
PROMPTS_PATH: str = os.getenv("MINDNET_PROMPTS_PATH", "config/prompts.yaml")
# NEU für WP-06
# WP-06 / WP-14 Performance & Timeouts
LLM_TIMEOUT: float = float(os.getenv("MINDNET_LLM_TIMEOUT", "120.0"))
DECISION_CONFIG_PATH: str = os.getenv("MINDNET_DECISION_CONFIG", "config/decision_engine.yaml")
BACKGROUND_LIMIT: int = int(os.getenv("MINDNET_LLM_BACKGROUND_LIMIT", "2"))
# API
# API & Debugging
DEBUG: bool = os.getenv("DEBUG", "false").lower() == "true"
MINDNET_VAULT_ROOT: str = os.getenv("MINDNET_VAULT_ROOT", "./vault")
# WP-04 Retriever Defaults
# WP-04 Retriever Gewichte (Semantik vs. Graph)
RETRIEVER_W_SEM: float = float(os.getenv("MINDNET_WP04_W_SEM", "0.70"))
RETRIEVER_W_EDGE: float = float(os.getenv("MINDNET_WP04_W_EDGE", "0.25"))
RETRIEVER_W_CENT: float = float(os.getenv("MINDNET_WP04_W_CENT", "0.05"))

View File

@ -1,16 +1,17 @@
"""
FILE: app/core/ingestion.py
DESCRIPTION: Haupt-Ingestion-Logik. Transformiert Markdown in den Graphen (Notes, Chunks, Edges).
FIX: Korrekte Priorisierung von Frontmatter für chunk_profile und retriever_weight.
Lade Chunk-Config basierend auf dem effektiven Profil, nicht nur dem Notiz-Typ.
WP-20: Integration von Smart Edge Allocation via Hybrid LLM (Gemini/Ollama).
WP-22: Integration von Content Lifecycle (Status Gate) und Edge Registry Validation.
WP-22: Kontextsensitive Kanten-Validierung mit Fundort-Reporting (Zeilennummern).
WP-22: Multi-Hash Refresh für konsistente Change Detection.
VERSION: 2.8.6 (WP-22 Lifecycle & Registry)
VERSION: 2.11.0 (WP-20 Full Integration: Hybrid Smart Edges)
STATUS: Active
DEPENDENCIES: app.core.parser, app.core.note_payload, app.core.chunker, app.core.derive_edges, app.core.qdrant*, app.services.embeddings_client, app.services.edge_registry
EXTERNAL_CONFIG: config/types.yaml
DEPENDENCIES: app.core.parser, app.core.note_payload, app.core.chunker, app.services.llm_service, app.services.edge_registry
EXTERNAL_CONFIG: config/types.yaml, config/prompts.yaml
"""
import os
import json
import logging
import asyncio
import time
@ -21,6 +22,7 @@ from app.core.parser import (
read_markdown,
normalize_frontmatter,
validate_required_frontmatter,
extract_edges_with_context, #
)
from app.core.note_payload import make_note_payload
from app.core.chunker import assemble_chunks, get_chunk_config
@ -42,6 +44,7 @@ from app.core.qdrant_points import (
from app.services.embeddings_client import EmbeddingsClient
from app.services.edge_registry import registry as edge_registry
from app.services.llm_service import LLMService #
logger = logging.getLogger(__name__)
@ -66,18 +69,15 @@ def effective_chunk_profile_name(fm: dict, note_type: str, reg: dict) -> str:
Ermittelt den Namen des zu nutzenden Chunk-Profils.
Priorität: 1. Frontmatter Override -> 2. Type Config -> 3. Global Default
"""
# 1. Frontmatter Override
override = fm.get("chunking_profile") or fm.get("chunk_profile")
if override and isinstance(override, str):
return override
# 2. Type Config
t_cfg = reg.get("types", {}).get(note_type, {})
if t_cfg:
cp = t_cfg.get("chunking_profile") or t_cfg.get("chunk_profile")
if cp: return cp
# 3. Global Default
return reg.get("defaults", {}).get("chunking_profile", "sliding_standard")
def effective_retriever_weight(fm: dict, note_type: str, reg: dict) -> float:
@ -85,34 +85,32 @@ def effective_retriever_weight(fm: dict, note_type: str, reg: dict) -> float:
Ermittelt das effektive retriever_weight für das Scoring.
Priorität: 1. Frontmatter Override -> 2. Type Config -> 3. Global Default
"""
# 1. Frontmatter Override
override = fm.get("retriever_weight")
if override is not None:
try: return float(override)
except: pass
# 2. Type Config
t_cfg = reg.get("types", {}).get(note_type, {})
if t_cfg and "retriever_weight" in t_cfg:
return float(t_cfg["retriever_weight"])
# 3. Global Default
return float(reg.get("defaults", {}).get("retriever_weight", 1.0))
class IngestionService:
def __init__(self, collection_prefix: str = None):
env_prefix = os.getenv("COLLECTION_PREFIX", "mindnet")
self.prefix = collection_prefix or env_prefix
from app.config import get_settings
self.settings = get_settings() #
self.prefix = collection_prefix or self.settings.COLLECTION_PREFIX
self.cfg = QdrantConfig.from_env()
self.cfg.prefix = self.prefix
self.client = get_client(self.cfg)
self.dim = self.cfg.dim
self.registry = load_type_registry()
self.dim = self.cfg.VECTOR_SIZE #
self.type_registry = load_type_registry()
self.embedder = EmbeddingsClient()
self.llm = LLMService() #
# Change Detection Modus (full oder body)
self.active_hash_mode = os.getenv("MINDNET_CHANGE_DETECTION_MODE", "full")
try:
@ -122,8 +120,8 @@ class IngestionService:
logger.warning(f"DB init warning: {e}")
def _get_chunk_config_by_profile(self, profile_name: str, note_type: str) -> Dict[str, Any]:
"""Holt die Chunker-Parameter (max, target, overlap) für ein spezifisches Profil."""
profiles = self.registry.get("chunking_profiles", {})
"""Holt die Chunker-Parameter für ein spezifisches Profil."""
profiles = self.type_registry.get("chunking_profiles", {})
if profile_name in profiles:
cfg = profiles[profile_name].copy()
if "overlap" in cfg and isinstance(cfg["overlap"], list):
@ -131,6 +129,43 @@ class IngestionService:
return cfg
return get_chunk_config(note_type)
async def _perform_smart_edge_allocation(self, text: str, note_id: str) -> List[Dict]:
"""
WP-20: Nutzt den Hybrid LLM Service für die semantische Kanten-Extraktion.
Verwendet provider-spezifische Prompts aus der config.
"""
provider = self.settings.MINDNET_LLM_PROVIDER #
# Prompt-Lookup (Fallback auf Standard-Struktur falls Key fehlt)
prompt_data = self.llm.prompts.get("edge_extraction", {})
if isinstance(prompt_data, dict):
template = prompt_data.get(provider, prompt_data.get("ollama", ""))
else:
template = str(prompt_data)
if not template:
template = "Extrahiere semantische Relationen aus: {text}. Antworte als JSON: [{\"to\": \"X\", \"kind\": \"Y\"}]"
prompt = template.format(text=text[:6000], note_id=note_id)
try:
# Nutzt die Semaphore für Hintergrund-Tasks
response_json = await self.llm.generate_raw_response(
prompt=prompt,
priority="background",
force_json=True
)
data = json.loads(response_json)
# Anreicherung mit Provenance-Metadaten für WP-22 Registry
for item in data:
item["provenance"] = "semantic_ai"
item["line"] = f"ai-{provider}"
return data
except Exception as e:
logger.warning(f"Smart Edge Allocation skipped for {note_id}: {e}")
return []
async def process_file(
self,
file_path: str,
@ -142,10 +177,7 @@ class IngestionService:
hash_source: str = "parsed",
hash_normalize: str = "canonical"
) -> Dict[str, Any]:
"""
Verarbeitet eine Markdown-Datei und schreibt sie in den Graphen.
Folgt dem 14-Schritte-Workflow.
"""
"""Verarbeitet eine Markdown-Datei und schreibt sie in den Graphen."""
result = {"path": file_path, "status": "skipped", "changed": False, "error": None}
# 1. Parse & Frontmatter Validation
@ -158,42 +190,29 @@ class IngestionService:
logger.error(f"Validation failed for {file_path}: {e}")
return {**result, "error": f"Validation failed: {str(e)}"}
# --- WP-22: Content Lifecycle Gate (Teil A) ---
# --- WP-22: Content Lifecycle Gate ---
status = fm.get("status", "draft").lower().strip()
# Hard Skip für System- oder Archiv-Dateien
if status in ["system", "template", "archive", "hidden"]:
logger.info(f"Skipping file {file_path} (Status: {status})")
return {**result, "status": "skipped", "reason": f"lifecycle_status_{status}"}
# 2. Type & Config Resolution
note_type = resolve_note_type(fm.get("type"), self.registry)
note_type = resolve_note_type(fm.get("type"), self.type_registry)
fm["type"] = note_type
effective_profile = effective_chunk_profile_name(fm, note_type, self.registry)
effective_weight = effective_retriever_weight(fm, note_type, self.registry)
effective_profile = effective_chunk_profile_name(fm, note_type, self.type_registry)
effective_weight = effective_retriever_weight(fm, note_type, self.type_registry)
fm["chunk_profile"] = effective_profile
fm["retriever_weight"] = effective_weight
# 3. Build Note Payload (Inkl. Multi-Hash für WP-22)
try:
note_pl = make_note_payload(
parsed,
vault_root=vault_root,
hash_normalize=hash_normalize,
hash_source=hash_source,
file_path=file_path
)
# Text Body Fallback
note_pl = make_note_payload(parsed, vault_root=vault_root, hash_normalize=hash_normalize, hash_source=hash_source, file_path=file_path)
if not note_pl.get("fulltext"): note_pl["fulltext"] = getattr(parsed, "body", "") or ""
# Sicherstellen der effektiven Werte im Payload
note_pl["retriever_weight"] = effective_weight
note_pl["chunk_profile"] = effective_profile
# WP-22: Status speichern
note_pl["status"] = status
note_id = note_pl["note_id"]
except Exception as e:
logger.error(f"Payload build failed: {e}")
@ -205,15 +224,12 @@ class IngestionService:
old_payload = self._fetch_note_payload(note_id)
has_old = old_payload is not None
# Prüfung gegen den aktuell konfigurierten Hash-Modus (body oder full)
check_key = f"{self.active_hash_mode}:{hash_source}:{hash_normalize}"
old_hashes = (old_payload or {}).get("hashes")
if isinstance(old_hashes, dict): old_hash = old_hashes.get(check_key)
elif isinstance(old_hashes, str) and self.active_hash_mode == "body": old_hash = old_hashes
else: old_hash = None
old_hashes = (old_payload or {}).get("hashes", {})
old_hash = old_hashes.get(check_key) if isinstance(old_hashes, dict) else None
new_hash = note_pl.get("hashes", {}).get(check_key)
hash_changed = (old_hash != new_hash)
chunks_missing, edges_missing = self._artifacts_missing(note_id)
@ -228,49 +244,38 @@ class IngestionService:
# 5. Processing (Chunking, Embedding, Edge Generation)
try:
body_text = getattr(parsed, "body", "") or ""
edge_registry.ensure_latest() #
# Konfiguration für das spezifische Profil laden
chunk_config = self._get_chunk_config_by_profile(effective_profile, note_type)
chunks = await assemble_chunks(fm["id"], body_text, fm["type"], config=chunk_config)
# Chunks mit Metadaten anreichern
chunk_pls = make_chunk_payloads(fm, note_pl["path"], chunks, note_text=body_text)
vecs = []
if chunk_pls:
texts = [c.get("window") or c.get("text") or "" for c in chunk_pls]
try:
if hasattr(self.embedder, 'embed_documents'):
vecs = await self.embedder.embed_documents(texts)
else:
for t in texts:
v = await self.embedder.embed_query(t)
vecs.append(v)
except Exception as e:
logger.error(f"Embedding failed: {e}")
raise RuntimeError(f"Embedding failed: {e}")
# Kanten generieren
try:
raw_edges = build_edges_for_note(
note_id,
chunk_pls,
note_level_references=note_pl.get("references", []),
include_note_scope_refs=note_scope_refs
)
except TypeError:
raw_edges = build_edges_for_note(note_id, chunk_pls)
# --- WP-22: Edge Registry Validation (Teil B) ---
# --- WP-22: Kanten-Extraktion & Validierung ---
edges = []
if raw_edges:
for edge in raw_edges:
original_kind = edge.get("kind", "related_to")
# Normalisierung über die Registry (Alias-Auflösung)
canonical_kind = edge_registry.resolve(original_kind)
edge["kind"] = canonical_kind
edges.append(edge)
context = {"file": file_path, "note_id": note_id}
# A. Explizite User-Kanten mit Zeilennummern
explicit_edges = extract_edges_with_context(parsed)
for e in explicit_edges:
e["kind"] = edge_registry.resolve(edge_type=e["kind"], provenance="explicit", context={**context, "line": e.get("line")})
edges.append(e)
# B. WP-20: Smart AI Edges (Hybrid Acceleration)
ai_edges = await self._perform_smart_edge_allocation(body_text, note_id)
for e in ai_edges:
e["kind"] = edge_registry.resolve(edge_type=e.get("kind"), provenance="semantic_ai", context={**context, "line": e.get("line")})
edges.append(e)
# C. System-Kanten (Struktur: belongs_to, next, prev)
raw_system_edges = build_edges_for_note(note_id, chunk_pls, note_level_references=note_pl.get("references", []))
for e in raw_system_edges:
e["kind"] = edge_registry.resolve(edge_type=e.get("kind", "belongs_to"), provenance="structure", context={**context, "line": "system"})
if e["kind"]: edges.append(e)
except Exception as e:
logger.error(f"Processing failed: {e}", exc_info=True)
@ -278,31 +283,23 @@ class IngestionService:
# 6. Upsert in Qdrant
try:
# Alte Fragmente löschen, um "Geister-Chunks" zu vermeiden
if purge_before and has_old:
self._purge_artifacts(note_id)
# Note Metadaten
n_name, n_pts = points_for_note(self.prefix, note_pl, None, self.dim)
upsert_batch(self.client, n_name, n_pts)
# Chunks (Vektoren)
if chunk_pls and vecs:
c_name, c_pts = points_for_chunks(self.prefix, chunk_pls, vecs)
upsert_batch(self.client, c_name, c_pts)
# Kanten
if edges:
e_name, e_pts = points_for_edges(self.prefix, edges)
upsert_batch(self.client, e_name, e_pts)
return {
"path": file_path,
"status": "success",
"changed": True,
"note_id": note_id,
"chunks_count": len(chunk_pls),
"edges_count": len(edges)
"path": file_path, "status": "success", "changed": True, "note_id": note_id,
"chunks_count": len(chunk_pls), "edges_count": len(edges)
}
except Exception as e:
logger.error(f"Upsert failed: {e}", exc_info=True)
@ -319,19 +316,17 @@ class IngestionService:
except: return None
def _artifacts_missing(self, note_id: str) -> Tuple[bool, bool]:
"""Prüft, ob Chunks oder Kanten für eine Note fehlen (Integritätscheck)."""
"""Prüft, ob Chunks oder Kanten für eine Note fehlen."""
from qdrant_client.http import models as rest
c_col = f"{self.prefix}_chunks"
e_col = f"{self.prefix}_edges"
try:
f = rest.Filter(must=[rest.FieldCondition(key="note_id", match=rest.MatchValue(value=note_id))])
c_pts, _ = self.client.scroll(collection_name=c_col, scroll_filter=f, limit=1)
e_pts, _ = self.client.scroll(collection_name=e_col, scroll_filter=f, limit=1)
c_pts, _ = self.client.scroll(collection_name=f"{self.prefix}_chunks", scroll_filter=f, limit=1)
e_pts, _ = self.client.scroll(collection_name=f"{self.prefix}_edges", scroll_filter=f, limit=1)
return (not bool(c_pts)), (not bool(e_pts))
except: return True, True
def _purge_artifacts(self, note_id: str):
"""Löscht alle Chunks und Edges einer Note (vor dem Neu-Schreiben)."""
"""Löscht alle Chunks und Edges einer Note."""
from qdrant_client.http import models as rest
f = rest.Filter(must=[rest.FieldCondition(key="note_id", match=rest.MatchValue(value=note_id))])
selector = rest.FilterSelector(filter=f)
@ -341,7 +336,7 @@ class IngestionService:
except Exception: pass
async def create_from_text(self, markdown_content: str, filename: str, vault_root: str, folder: str = "00_Inbox") -> Dict[str, Any]:
"""Hilfsmethode zur Erstellung einer Note aus einem Textstream (Editor-Save)."""
"""Hilfsmethode zur Erstellung einer Note aus einem Textstream."""
target_dir = os.path.join(vault_root, folder)
os.makedirs(target_dir, exist_ok=True)
file_path = os.path.join(target_dir, filename)
@ -351,7 +346,6 @@ class IngestionService:
f.flush()
os.fsync(f.fileno())
await asyncio.sleep(0.1)
logger.info(f"Written file to {file_path}")
except Exception as e:
return {"status": "error", "error": f"Disk write failed: {str(e)}"}
return await self.process_file(file_path=file_path, vault_root=vault_root, apply=True, force_replace=True, purge_before=True)

View File

@ -2,7 +2,8 @@
FILE: app/routers/ingest.py
DESCRIPTION: Endpunkte für WP-11. Nimmt Markdown entgegen.
Refactored für WP-14: Nutzt BackgroundTasks für non-blocking Save.
VERSION: 0.7.0 (Fix: Timeout WP-14)
Update WP-20: Unterstützung für Hybrid-Cloud-Analyse Feedback.
VERSION: 0.8.0 (WP-20 Hybrid Ready)
STATUS: Active
DEPENDENCIES: app.core.ingestion, app.services.discovery, fastapi, pydantic
"""
@ -44,6 +45,7 @@ class SaveResponse(BaseModel):
async def run_ingestion_task(markdown_content: str, filename: str, vault_root: str, folder: str):
"""
Führt die Ingestion im Hintergrund aus, damit der Request nicht blockiert.
Integrierter WP-20 Hybrid-Modus über den IngestionService.
"""
logger.info(f"🔄 Background Task started: Ingesting {filename}...")
try:
@ -80,15 +82,17 @@ async def analyze_draft(req: AnalyzeRequest):
async def save_note(req: SaveRequest, background_tasks: BackgroundTasks):
"""
WP-14 Fix: Startet Ingestion im Hintergrund (Fire & Forget).
Verhindert Timeouts bei aktiver Smart-Edge-Allocation (WP-15).
Verhindert Timeouts bei aktiver Smart-Edge-Allocation (WP-15) und Cloud-Hybrid-Modus (WP-20).
"""
try:
vault_root = os.getenv("MINDNET_VAULT_ROOT", "./vault")
abs_vault_root = os.path.abspath(vault_root)
if not os.path.exists(abs_vault_root):
try: os.makedirs(abs_vault_root, exist_ok=True)
except: pass
try:
os.makedirs(abs_vault_root, exist_ok=True)
except Exception as e:
logger.warning(f"Could not create vault root: {e}")
final_filename = req.filename or f"draft_{int(time.time())}.md"
@ -109,7 +113,7 @@ async def save_note(req: SaveRequest, background_tasks: BackgroundTasks):
status="queued",
file_path=os.path.join(req.folder, final_filename),
note_id="pending",
message="Speicherung & KI-Analyse im Hintergrund gestartet.",
message="Speicherung & Hybrid-KI-Analyse (WP-20) im Hintergrund gestartet.",
stats={
"chunks": -1, # Indikator für Async
"edges": -1

View File

@ -1,11 +1,11 @@
"""
FILE: app/services/llm_service.py
DESCRIPTION: Asynchroner Client für Ollama. Verwaltet Prompts und Background-Last (Semaphore).
VERSION: 2.8.0
DESCRIPTION: Hybrid-Client für Ollama & Google Gemini.
Verwaltet Prompts, Background-Last (Semaphore) und Cloud-Routing.
VERSION: 3.1.0 (WP-20 Full Integration: Provider-Aware Prompting)
STATUS: Active
DEPENDENCIES: httpx, yaml, asyncio, app.config
DEPENDENCIES: httpx, yaml, asyncio, google-generativeai, app.config
EXTERNAL_CONFIG: config/prompts.yaml
LAST_ANALYSIS: 2025-12-15
"""
import httpx
@ -13,47 +13,47 @@ import yaml
import logging
import os
import asyncio
import json
import google.generativeai as genai
from pathlib import Path
from typing import Optional, Dict, Any, Literal
from app.config import get_settings
logger = logging.getLogger(__name__)
class Settings:
OLLAMA_URL = os.getenv("MINDNET_OLLAMA_URL", "http://127.0.0.1:11434")
LLM_TIMEOUT = float(os.getenv("MINDNET_LLM_TIMEOUT", 300.0))
LLM_MODEL = os.getenv("MINDNET_LLM_MODEL", "phi3:mini")
PROMPTS_PATH = os.getenv("MINDNET_PROMPTS_PATH", "./config/prompts.yaml")
# NEU: Konfigurierbares Limit für Hintergrund-Last
# Default auf 2 (konservativ), kann in .env erhöht werden.
BACKGROUND_LIMIT = int(os.getenv("MINDNET_LLM_BACKGROUND_LIMIT", "2"))
def get_settings():
return Settings()
class LLMService:
# GLOBALER SEMAPHOR (Lazy Initialization)
# Wir initialisieren ihn erst, wenn wir die Settings kennen.
# GLOBALER SEMAPHOR für Hintergrund-Last Steuerung (WP-06 / WP-20)
_background_semaphore = None
def __init__(self):
self.settings = get_settings()
self.prompts = self._load_prompts()
# Initialisiere Semaphore einmalig auf Klassen-Ebene basierend auf Config
# Initialisiere Semaphore einmalig auf Klassen-Ebene
if LLMService._background_semaphore is None:
limit = self.settings.BACKGROUND_LIMIT
limit = getattr(self.settings, "BACKGROUND_LIMIT", 2)
logger.info(f"🚦 LLMService: Initializing Background Semaphore with limit: {limit}")
LLMService._background_semaphore = asyncio.Semaphore(limit)
# Ollama Setup
self.timeout = httpx.Timeout(self.settings.LLM_TIMEOUT, connect=10.0)
self.client = httpx.AsyncClient(
self.ollama_client = httpx.AsyncClient(
base_url=self.settings.OLLAMA_URL,
timeout=self.timeout
)
# Gemini Setup [WP-20]
if hasattr(self.settings, "GOOGLE_API_KEY") and self.settings.GOOGLE_API_KEY:
genai.configure(api_key=self.settings.GOOGLE_API_KEY)
model_name = getattr(self.settings, "GEMINI_MODEL", "gemini-1.5-flash")
self.gemini_model = genai.GenerativeModel(model_name)
logger.info(f"✨ LLMService: Gemini Cloud Mode active ({model_name})")
else:
self.gemini_model = None
logger.warning("⚠️ LLMService: No GOOGLE_API_KEY found. Gemini mode disabled.")
def _load_prompts(self) -> dict:
"""Lädt die Prompt-Konfiguration aus der YAML-Datei."""
path = Path(self.settings.PROMPTS_PATH)
if not path.exists(): return {}
try:
@ -62,31 +62,74 @@ class LLMService:
logger.error(f"Failed to load prompts: {e}")
return {}
def get_prompt(self, key: str, provider: str = None) -> str:
"""
Wählt das Template basierend auf dem Provider aus (WP-20).
Unterstützt sowohl flache Strings als auch Dictionary-basierte Provider-Zweige.
"""
active_provider = provider or getattr(self.settings, "MINDNET_LLM_PROVIDER", "ollama")
data = self.prompts.get(key, "")
if isinstance(data, dict):
# Versuche den Provider-Key, Fallback auf 'ollama'
return data.get(active_provider, data.get("ollama", ""))
return str(data)
async def generate_raw_response(
self,
prompt: str,
system: str = None,
force_json: bool = False,
max_retries: int = 0,
max_retries: int = 2,
base_delay: float = 2.0,
priority: Literal["realtime", "background"] = "realtime"
priority: Literal["realtime", "background"] = "realtime",
provider: Optional[str] = None
) -> str:
"""
Führt einen LLM Call aus.
priority="realtime": Chat (Sofort, keine Bremse).
priority="background": Import/Analyse (Gedrosselt durch Semaphore).
Führt einen LLM Call aus mit Priority-Handling und Provider-Wahl.
"""
# Bestimme Provider: Parameter-Override > Config-Default
target_provider = provider or getattr(self.settings, "MINDNET_LLM_PROVIDER", "ollama")
use_semaphore = (priority == "background")
if use_semaphore and LLMService._background_semaphore:
async with LLMService._background_semaphore:
return await self._execute_request(prompt, system, force_json, max_retries, base_delay)
return await self._dispatch_request(target_provider, prompt, system, force_json, max_retries, base_delay)
else:
# Realtime oder Fallback (falls Semaphore Init fehlschlug)
return await self._execute_request(prompt, system, force_json, max_retries, base_delay)
return await self._dispatch_request(target_provider, prompt, system, force_json, max_retries, base_delay)
async def _execute_request(self, prompt, system, force_json, max_retries, base_delay):
async def _dispatch_request(self, provider, prompt, system, force_json, max_retries, base_delay):
"""Routet die Anfrage an den gewählten Provider mit Fallback-Logik."""
try:
if provider == "gemini" and self.gemini_model:
return await self._execute_gemini(prompt, system, force_json)
else:
return await self._execute_ollama(prompt, system, force_json, max_retries, base_delay)
except Exception as e:
# Automatischer Fallback auf Ollama bei Cloud-Fehlern (WP-20)
if provider == "gemini" and getattr(self.settings, "LLM_FALLBACK_ENABLED", True):
logger.warning(f"🔄 Gemini failed: {e}. Falling back to Ollama.")
return await self._execute_ollama(prompt, system, force_json, max_retries, base_delay)
raise e
async def _execute_gemini(self, prompt, system, force_json) -> str:
"""Asynchroner Google Gemini Call (WP-20)."""
full_prompt = f"System: {system}\n\nUser: {prompt}" if system else prompt
# Gemini JSON Mode Support
gen_config = {}
if force_json:
gen_config["response_mime_type"] = "application/json"
response = await self.gemini_model.generate_content_async(
full_prompt,
generation_config=gen_config
)
return response.text.strip()
async def _execute_ollama(self, prompt, system, force_json, max_retries, base_delay) -> str:
"""Ollama Call mit exponentieller Backoff-Retry-Logik."""
payload: Dict[str, Any] = {
"model": self.settings.LLM_MODEL,
"prompt": prompt,
@ -96,52 +139,47 @@ class LLMService:
"num_ctx": 8192
}
}
if force_json:
payload["format"] = "json"
if system:
payload["system"] = system
if force_json: payload["format"] = "json"
if system: payload["system"] = system
attempt = 0
while True:
try:
response = await self.client.post("/api/generate", json=payload)
response = await self.ollama_client.post("/api/generate", json=payload)
if response.status_code == 200:
data = response.json()
return data.get("response", "").strip()
else:
return response.json().get("response", "").strip()
response.raise_for_status()
except Exception as e:
attempt += 1
if attempt > max_retries:
logger.error(f"LLM Final Error (Versuch {attempt}): {e}")
logger.error(f"Ollama Error after {attempt} retries: {e}")
raise e
# Exponentieller Backoff: base_delay * (2 ^ (attempt - 1))
wait_time = base_delay * (2 ** (attempt - 1))
logger.warning(f"⚠️ LLM Retry ({attempt}/{max_retries}) in {wait_time}s: {e}")
logger.warning(f"⚠️ Ollama attempt {attempt} failed. Retrying in {wait_time}s...")
await asyncio.sleep(wait_time)
async def generate_rag_response(self, query: str, context_str: str) -> str:
"""
Chat-Wrapper: Immer Realtime.
"""
system_prompt = self.prompts.get("system_prompt", "")
rag_template = self.prompts.get("rag_template", "{context_str}\n\n{query}")
"""Standard RAG Chat-Interface mit Provider-spezifischen Templates."""
provider = getattr(self.settings, "MINDNET_LLM_PROVIDER", "ollama")
# Holen der Templates über die neue get_prompt Methode
system_prompt = self.get_prompt("system_prompt", provider)
rag_template = self.get_prompt("rag_template", provider)
# Fallback für RAG Template Struktur
if not rag_template:
rag_template = "{context_str}\n\n{query}"
final_prompt = rag_template.format(context_str=context_str, query=query)
return await self.generate_raw_response(
final_prompt,
system=system_prompt,
max_retries=0,
force_json=False,
priority="realtime"
)
async def close(self):
if self.client:
await self.client.aclose()
"""Schließt alle offenen HTTP-Verbindungen."""
if self.ollama_client:
await self.ollama_client.aclose()

View File

@ -1,31 +1,37 @@
# config/decision_engine.yaml
# Steuerung der Decision Engine (Intent Recognition & Graph Routing)
# Version: 2.5.0 (WP-22: Semantic Graph Routing)
# VERSION: 2.6.0 (WP-20: Hybrid LLM & WP-22: Semantic Graph Routing)
# STATUS: Active
version: 2.5
version: 2.6
settings:
llm_fallback_enabled: true
# Strategie für den Router selbst (Welches Modell erkennt den Intent?)
# "auto" nutzt den in MINDNET_LLM_PROVIDER gesetzten Standard.
router_provider: "auto"
# Few-Shot Prompting für den LLM-Router (Slow Path)
# Gemini 1.5 nutzt diesen Kontext für hochpräzise Intent-Erkennung.
llm_router_prompt: |
Du bist ein Klassifikator. Analysiere die Nachricht und wähle die passende Strategie.
Du bist der zentrale Intent-Klassifikator für Mindnet, einen digitalen Zwilling.
Analysiere die Nachricht und wähle die passende Strategie.
Antworte NUR mit dem Namen der Strategie.
STRATEGIEN:
- INTERVIEW: User will Wissen erfassen, Notizen anlegen oder Dinge festhalten.
- DECISION: Rat, Strategie, Vor/Nachteile, "Soll ich".
- EMPATHY: Gefühle, Frust, Freude, Probleme.
- CODING: Code, Syntax, Programmierung.
- FACT: Wissen, Fakten, Definitionen.
- DECISION: Rat, Strategie, Abwägung von Werten, "Soll ich tun X?".
- EMPATHY: Gefühle, Reflexion der eigenen Verfassung, Frust, Freude.
- CODING: Code-Erstellung, Debugging, technische Dokumentation.
- FACT: Reine Wissensabfrage, Definitionen, Suchen von Informationen.
BEISPIELE:
User: "Wie funktioniert Qdrant?" -> FACT
User: "Soll ich Qdrant nutzen?" -> DECISION
User: "Ich möchte etwas notieren" -> INTERVIEW
User: "Lass uns das festhalten" -> INTERVIEW
User: "Schreibe ein Python Script" -> CODING
User: "Alles ist grau und sinnlos" -> EMPATHY
User: "Wie funktioniert die Qdrant-Vektor-DB?" -> FACT
User: "Soll ich mein Startup jetzt verkaufen?" -> DECISION
User: "Notiere mir kurz meine Gedanken zum Meeting." -> INTERVIEW
User: "Ich fühle mich heute sehr erschöpft." -> EMPATHY
User: "Schreibe eine FastAPI-Route für den Ingest." -> CODING
NACHRICHT: "{query}"
@ -35,6 +41,7 @@ strategies:
# 1. Fakten-Abfrage (Fallback & Default)
FACT:
description: "Reine Wissensabfrage."
preferred_provider: "ollama" # Schnell und lokal ausreichend
trigger_keywords: []
inject_types: []
# WP-22: Definitionen & Hierarchien bevorzugen
@ -46,9 +53,10 @@ strategies:
prompt_template: "rag_template"
prepend_instruction: null
# 2. Entscheidungs-Frage
# 2. Entscheidungs-Frage (Power-Strategie)
DECISION:
description: "Der User sucht Rat, Strategie oder Abwägung."
preferred_provider: "gemini" # Nutzt Gemini's Reasoning-Power für WP-20
trigger_keywords:
- "soll ich"
- "meinung"
@ -68,12 +76,13 @@ strategies:
impacts: 2.0 # NEU: Zeige mir alles, was von dieser Entscheidung betroffen ist!
prompt_template: "decision_template"
prepend_instruction: |
!!! ENTSCHEIDUNGS-MODUS !!!
!!! ENTSCHEIDUNGS-MODUS (HYBRID AI) !!!
BITTE WÄGE FAKTEN GEGEN FOLGENDE WERTE, PRINZIPIEN UND ZIELE AB:
# 3. Empathie / "Ich"-Modus
# 3. Empathie / "Ich"-Modus (Privacy-Fokus)
EMPATHY:
description: "Reaktion auf emotionale Zustände."
preferred_provider: "ollama" # Private Emotionen bleiben lokal!
trigger_keywords:
- "ich fühle"
- "traurig"
@ -96,6 +105,7 @@ strategies:
# 4. Coding / Technical
CODING:
description: "Technische Anfragen und Programmierung."
preferred_provider: "gemini" # Höheres Weltwissen für moderne Libraries
trigger_keywords:
- "code"
- "python"
@ -116,10 +126,9 @@ strategies:
prepend_instruction: null
# 5. Interview / Datenerfassung
# HINWEIS: Spezifische Typen (Projekt, Ziel etc.) werden automatisch
# über die types.yaml erkannt. Hier stehen nur generische Trigger.
INTERVIEW:
description: "Der User möchte Wissen erfassen."
preferred_provider: "ollama" # Lokale Erfassung ist performant genug
trigger_keywords:
- "neue notiz"
- "etwas notieren"
@ -135,8 +144,7 @@ strategies:
edge_boosts: {}
prompt_template: "interview_template"
prepend_instruction: null
# Schemas: Hier nur der Fallback.
# Spezifische Schemas (Project, Experience) kommen jetzt aus types.yaml!
# Schemas kommen aus types.yaml (WP-22)
schemas:
default:
fields:

View File

@ -15,7 +15,8 @@ system_prompt: |
# ---------------------------------------------------------
# 1. STANDARD: Fakten & Wissen (Intent: FACT)
# ---------------------------------------------------------
rag_template: |
rag_template:
ollama: |
QUELLEN (WISSEN):
=========================================
{context_str}
@ -27,11 +28,16 @@ rag_template: |
ANWEISUNG:
Beantworte die Frage präzise basierend auf den Quellen.
Fasse die Informationen zusammen. Sei objektiv und neutral.
gemini: |
Analysiere diesen Kontext meines digitalen Zwillings:
{context_str}
Beantworte die Anfrage detailliert und prüfe auf Widersprüche: {query}
# ---------------------------------------------------------
# 2. DECISION: Strategie & Abwägung (Intent: DECISION)
# ---------------------------------------------------------
decision_template: |
decision_template:
ollama: |
KONTEXT (FAKTEN & STRATEGIE):
=========================================
{context_str}
@ -50,11 +56,15 @@ decision_template: |
- **Analyse:** (Kurze Zusammenfassung der Fakten)
- **Abgleich:** (Gibt es Konflikte mit Werten/Zielen? Nenne die Quelle!)
- **Empfehlung:** (Klare Meinung: Ja/Nein/Vielleicht mit Begründung)
gemini: |
Agierte als Senior Strategy Consultant. Nutze den Kontext {context_str}, um die Frage {query}
tiefgreifend gegen meine langfristigen Ziele abzuwägen.
# ---------------------------------------------------------
# 3. EMPATHY: Der Spiegel / "Ich"-Modus (Intent: EMPATHY)
# ---------------------------------------------------------
empathy_template: |
empathy_template:
ollama: |
KONTEXT (ERFAHRUNGEN & GLAUBENSSÄTZE):
=========================================
{context_str}
@ -75,7 +85,8 @@ empathy_template: |
# ---------------------------------------------------------
# 4. TECHNICAL: Der Coder (Intent: CODING)
# ---------------------------------------------------------
technical_template: |
technical_template:
ollama: |
KONTEXT (DOCS & SNIPPETS):
=========================================
{context_str}
@ -97,7 +108,8 @@ technical_template: |
# ---------------------------------------------------------
# 5. INTERVIEW: Der "One-Shot Extractor" (Performance Mode)
# ---------------------------------------------------------
interview_template: |
interview_template:
ollama: |
TASK:
Du bist ein professioneller Ghostwriter. Verwandle den "USER INPUT" in eine strukturierte Notiz vom Typ '{target_type}'.
@ -135,7 +147,8 @@ interview_template: |
# ---------------------------------------------------------
# 6. EDGE_ALLOCATION: Kantenfilter (Intent: OFFLINE_FILTER)
# ---------------------------------------------------------
edge_allocation_template: |
edge_allocation_template:
ollama: |
TASK:
Du bist ein strikter Selektor. Du erhältst eine Liste von "Kandidaten-Kanten" (Strings).
Wähle jene aus, die inhaltlich im "Textabschnitt" vorkommen oder relevant sind.
@ -160,3 +173,9 @@ edge_allocation_template: |
Output: ["blocks:Projekt Alpha", "needs:Budget"]
DEIN OUTPUT (JSON):
gemini: |
Extrahiere semantische Kanten für den Graphen ({note_id}).
Finde auch implizite Verbindungen.
JSON: [{"to": "X", "kind": "Y", "reason": "Z"}].
TEXT: {text}

View File

@ -35,3 +35,6 @@ streamlit>=1.39.0
# Visualization (Parallelbetrieb)
streamlit-agraph>=0.0.45
st-cytoscape
# Google gemini API
google-generativeai>=0.8.3