WP20 initial

2025-12-23 14:33:51 +01:00 · 2025-12-23 14:33:51 +01:00 · 234949800b
commit 234949800b
parent 2d43e0596c
7 changed files with 383 additions and 307 deletions
--- a/app/config.py
+++ b/app/config.py
@ -1,10 +1,11 @@
 """
 FILE: app/config.py
 DESCRIPTION: Zentrale Pydantic-Konfiguration (Env-Vars für Qdrant, LLM, Retriever).
-VERSION: 0.4.0
+             Erweitert um WP-20 Hybrid-Optionen.
 VERSION: 0.5.0
 STATUS: Active
 DEPENDENCIES: os, functools, pathlib
-LAST_ANALYSIS: 2025-12-15
+LAST_ANALYSIS: 2025-12-23
 """
 from __future__ import annotations
 import os
@ -12,29 +13,38 @@ from functools import lru_cache
 from pathlib import Path
 class Settings:
-    # Qdrant
+    # Qdrant Verbindung
    QDRANT_URL: str = os.getenv("QDRANT_URL", "http://127.0.0.1:6333")
    QDRANT_API_KEY: str | None = os.getenv("QDRANT_API_KEY")
    COLLECTION_PREFIX: str = os.getenv("MINDNET_PREFIX", "mindnet")
    VECTOR_SIZE: int = int(os.getenv("MINDNET_VECTOR_SIZE", "384"))
    DISTANCE: str = os.getenv("MINDNET_DISTANCE", "Cosine")
-    # Embeddings
+    # Embeddings (lokal)
    MODEL_NAME: str = os.getenv("MINDNET_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
-    # WP-05 LLM / Ollama
+    # WP-20 Hybrid LLM Provider
    # Erlaubt: "ollama" oder "gemini"
    MINDNET_LLM_PROVIDER: str = os.getenv("MINDNET_LLM_PROVIDER", "ollama").lower()
    GOOGLE_API_KEY: str | None = os.getenv("GOOGLE_API_KEY")
    GEMINI_MODEL: str = os.getenv("MINDNET_GEMINI_MODEL", "gemini-1.5-flash")
    LLM_FALLBACK_ENABLED: bool = os.getenv("MINDNET_LLM_FALLBACK", "true").lower() == "true"
    # WP-05 LLM / Ollama (Local)
    OLLAMA_URL: str = os.getenv("MINDNET_OLLAMA_URL", "http://127.0.0.1:11434")
    LLM_MODEL: str = os.getenv("MINDNET_LLM_MODEL", "phi3:mini") 
    PROMPTS_PATH: str = os.getenv("MINDNET_PROMPTS_PATH", "config/prompts.yaml")
-    # NEU für WP-06
+    # WP-06 / WP-14 Performance & Timeouts
    LLM_TIMEOUT: float = float(os.getenv("MINDNET_LLM_TIMEOUT", "120.0"))
    DECISION_CONFIG_PATH: str = os.getenv("MINDNET_DECISION_CONFIG", "config/decision_engine.yaml")
    BACKGROUND_LIMIT: int = int(os.getenv("MINDNET_LLM_BACKGROUND_LIMIT", "2"))
-    # API
+    # API & Debugging
    DEBUG: bool = os.getenv("DEBUG", "false").lower() == "true"
    MINDNET_VAULT_ROOT: str = os.getenv("MINDNET_VAULT_ROOT", "./vault")
-    # WP-04 Retriever Defaults
+    # WP-04 Retriever Gewichte (Semantik vs. Graph)
    RETRIEVER_W_SEM: float = float(os.getenv("MINDNET_WP04_W_SEM", "0.70"))
    RETRIEVER_W_EDGE: float = float(os.getenv("MINDNET_WP04_W_EDGE", "0.25"))
    RETRIEVER_W_CENT: float = float(os.getenv("MINDNET_WP04_W_CENT", "0.05"))
--- a/app/core/ingestion.py
+++ b/app/core/ingestion.py
@ -1,16 +1,17 @@
 """
 FILE: app/core/ingestion.py
 DESCRIPTION: Haupt-Ingestion-Logik. Transformiert Markdown in den Graphen (Notes, Chunks, Edges).
-FIX: Korrekte Priorisierung von Frontmatter für chunk_profile und retriever_weight.
+             WP-20: Integration von Smart Edge Allocation via Hybrid LLM (Gemini/Ollama).
     Lade Chunk-Config basierend auf dem effektiven Profil, nicht nur dem Notiz-Typ.
             WP-22: Integration von Content Lifecycle (Status Gate) und Edge Registry Validation.
             WP-22: Kontextsensitive Kanten-Validierung mit Fundort-Reporting (Zeilennummern).
             WP-22: Multi-Hash Refresh für konsistente Change Detection.
-VERSION: 2.8.6 (WP-22 Lifecycle & Registry)
+VERSION: 2.11.0 (WP-20 Full Integration: Hybrid Smart Edges)
 STATUS: Active
-DEPENDENCIES: app.core.parser, app.core.note_payload, app.core.chunker, app.core.derive_edges, app.core.qdrant*, app.services.embeddings_client, app.services.edge_registry
+DEPENDENCIES: app.core.parser, app.core.note_payload, app.core.chunker, app.services.llm_service, app.services.edge_registry
-EXTERNAL_CONFIG: config/types.yaml
+EXTERNAL_CONFIG: config/types.yaml, config/prompts.yaml
 """
 import os
 import json
 import logging
 import asyncio
 import time
@ -21,6 +22,7 @@ from app.core.parser import (
    read_markdown,
    normalize_frontmatter,
    validate_required_frontmatter,
    extract_edges_with_context,  #
 )
 from app.core.note_payload import make_note_payload
 from app.core.chunker import assemble_chunks, get_chunk_config
@ -42,6 +44,7 @@ from app.core.qdrant_points import (
 from app.services.embeddings_client import EmbeddingsClient
 from app.services.edge_registry import registry as edge_registry
 from app.services.llm_service import LLMService  #
 logger = logging.getLogger(__name__)
@ -66,18 +69,15 @@ def effective_chunk_profile_name(fm: dict, note_type: str, reg: dict) -> str:
    Ermittelt den Namen des zu nutzenden Chunk-Profils.
    Priorität: 1. Frontmatter Override -> 2. Type Config -> 3. Global Default
    """
    # 1. Frontmatter Override
    override = fm.get("chunking_profile") or fm.get("chunk_profile")
    if override and isinstance(override, str):
        return override
    # 2. Type Config
    t_cfg = reg.get("types", {}).get(note_type, {})
    if t_cfg:
        cp = t_cfg.get("chunking_profile") or t_cfg.get("chunk_profile")
        if cp: return cp
    # 3. Global Default
    return reg.get("defaults", {}).get("chunking_profile", "sliding_standard")
 def effective_retriever_weight(fm: dict, note_type: str, reg: dict) -> float:
@ -85,34 +85,32 @@ def effective_retriever_weight(fm: dict, note_type: str, reg: dict) -> float:
    Ermittelt das effektive retriever_weight für das Scoring.
    Priorität: 1. Frontmatter Override -> 2. Type Config -> 3. Global Default
    """
    # 1. Frontmatter Override
    override = fm.get("retriever_weight")
    if override is not None:
        try: return float(override)
        except: pass
    # 2. Type Config
    t_cfg = reg.get("types", {}).get(note_type, {})
    if t_cfg and "retriever_weight" in t_cfg:
        return float(t_cfg["retriever_weight"])
    # 3. Global Default
    return float(reg.get("defaults", {}).get("retriever_weight", 1.0))
 class IngestionService:
    def __init__(self, collection_prefix: str = None):
-        env_prefix = os.getenv("COLLECTION_PREFIX", "mindnet")
+        from app.config import get_settings
-        self.prefix = collection_prefix or env_prefix
+        self.settings = get_settings()  #
        self.prefix = collection_prefix or self.settings.COLLECTION_PREFIX
        self.cfg = QdrantConfig.from_env()
        self.cfg.prefix = self.prefix 
        self.client = get_client(self.cfg)
-        self.dim = self.cfg.dim
+        self.dim = self.cfg.VECTOR_SIZE  #
-        self.registry = load_type_registry()
+        self.type_registry = load_type_registry()
        self.embedder = EmbeddingsClient()
        self.llm = LLMService()  #
        # Change Detection Modus (full oder body)
        self.active_hash_mode = os.getenv("MINDNET_CHANGE_DETECTION_MODE", "full")
        try:
@ -122,8 +120,8 @@ class IngestionService:
            logger.warning(f"DB init warning: {e}")
    def _get_chunk_config_by_profile(self, profile_name: str, note_type: str) -> Dict[str, Any]:
-        """Holt die Chunker-Parameter (max, target, overlap) für ein spezifisches Profil."""
+        """Holt die Chunker-Parameter für ein spezifisches Profil."""
-        profiles = self.registry.get("chunking_profiles", {})
+        profiles = self.type_registry.get("chunking_profiles", {})
        if profile_name in profiles:
            cfg = profiles[profile_name].copy()
            if "overlap" in cfg and isinstance(cfg["overlap"], list): 
@ -131,6 +129,43 @@ class IngestionService:
            return cfg
        return get_chunk_config(note_type)
    async def _perform_smart_edge_allocation(self, text: str, note_id: str) -> List[Dict]:
        """
        WP-20: Nutzt den Hybrid LLM Service für die semantische Kanten-Extraktion.
        Verwendet provider-spezifische Prompts aus der config.
        """
        provider = self.settings.MINDNET_LLM_PROVIDER  #
        # Prompt-Lookup (Fallback auf Standard-Struktur falls Key fehlt)
        prompt_data = self.llm.prompts.get("edge_extraction", {})
        if isinstance(prompt_data, dict):
            template = prompt_data.get(provider, prompt_data.get("ollama", ""))
        else:
            template = str(prompt_data)
        if not template:
            template = "Extrahiere semantische Relationen aus: {text}. Antworte als JSON: [{\"to\": \"X\", \"kind\": \"Y\"}]"
        prompt = template.format(text=text[:6000], note_id=note_id)
        try:
            # Nutzt die Semaphore für Hintergrund-Tasks
            response_json = await self.llm.generate_raw_response(
                prompt=prompt,
                priority="background",
                force_json=True
            )
            data = json.loads(response_json)
            # Anreicherung mit Provenance-Metadaten für WP-22 Registry
            for item in data:
                item["provenance"] = "semantic_ai"
                item["line"] = f"ai-{provider}"
            return data
        except Exception as e:
            logger.warning(f"Smart Edge Allocation skipped for {note_id}: {e}")
            return []
    async def process_file(
        self, 
        file_path: str, 
@ -142,10 +177,7 @@ class IngestionService:
        hash_source: str = "parsed",
        hash_normalize: str = "canonical"
    ) -> Dict[str, Any]:
-        """
+        """Verarbeitet eine Markdown-Datei und schreibt sie in den Graphen."""
        Verarbeitet eine Markdown-Datei und schreibt sie in den Graphen.
        Folgt dem 14-Schritte-Workflow.
        """
        result = {"path": file_path, "status": "skipped", "changed": False, "error": None}
        # 1. Parse & Frontmatter Validation
@ -158,42 +190,29 @@ class IngestionService:
            logger.error(f"Validation failed for {file_path}: {e}")
            return {**result, "error": f"Validation failed: {str(e)}"}
-        # --- WP-22: Content Lifecycle Gate (Teil A) ---
+        # --- WP-22: Content Lifecycle Gate ---
        status = fm.get("status", "draft").lower().strip()
        # Hard Skip für System- oder Archiv-Dateien
        if status in ["system", "template", "archive", "hidden"]:
            logger.info(f"Skipping file {file_path} (Status: {status})")
            return {**result, "status": "skipped", "reason": f"lifecycle_status_{status}"}
        # 2. Type & Config Resolution
-        note_type = resolve_note_type(fm.get("type"), self.registry)
+        note_type = resolve_note_type(fm.get("type"), self.type_registry)
        fm["type"] = note_type
-        effective_profile = effective_chunk_profile_name(fm, note_type, self.registry)
+        effective_profile = effective_chunk_profile_name(fm, note_type, self.type_registry)
-        effective_weight = effective_retriever_weight(fm, note_type, self.registry)
+        effective_weight = effective_retriever_weight(fm, note_type, self.type_registry)
        fm["chunk_profile"] = effective_profile 
        fm["retriever_weight"] = effective_weight
        # 3. Build Note Payload (Inkl. Multi-Hash für WP-22)
        try:
-            note_pl = make_note_payload(
+            note_pl = make_note_payload(parsed, vault_root=vault_root, hash_normalize=hash_normalize, hash_source=hash_source, file_path=file_path)
                parsed,
                vault_root=vault_root,
                hash_normalize=hash_normalize,
                hash_source=hash_source,
                file_path=file_path
            )
            # Text Body Fallback
            if not note_pl.get("fulltext"): note_pl["fulltext"] = getattr(parsed, "body", "") or ""
            # Sicherstellen der effektiven Werte im Payload
            note_pl["retriever_weight"] = effective_weight
            note_pl["chunk_profile"] = effective_profile
            # WP-22: Status speichern
            note_pl["status"] = status
            note_id = note_pl["note_id"]
        except Exception as e:
             logger.error(f"Payload build failed: {e}")
@ -205,15 +224,12 @@ class IngestionService:
            old_payload = self._fetch_note_payload(note_id)
        has_old = old_payload is not None
        # Prüfung gegen den aktuell konfigurierten Hash-Modus (body oder full)
        check_key = f"{self.active_hash_mode}:{hash_source}:{hash_normalize}"
-        old_hashes = (old_payload or {}).get("hashes")
+        old_hashes = (old_payload or {}).get("hashes", {})
-        if isinstance(old_hashes, dict): old_hash = old_hashes.get(check_key)
+        old_hash = old_hashes.get(check_key) if isinstance(old_hashes, dict) else None
        elif isinstance(old_hashes, str) and self.active_hash_mode == "body": old_hash = old_hashes
        else: old_hash = None
        new_hash = note_pl.get("hashes", {}).get(check_key)
        hash_changed = (old_hash != new_hash)
        chunks_missing, edges_missing = self._artifacts_missing(note_id)
@ -228,49 +244,38 @@ class IngestionService:
        # 5. Processing (Chunking, Embedding, Edge Generation)
        try:
            body_text = getattr(parsed, "body", "") or ""
            edge_registry.ensure_latest()  #
            # Konfiguration für das spezifische Profil laden
            chunk_config = self._get_chunk_config_by_profile(effective_profile, note_type)
            chunks = await assemble_chunks(fm["id"], body_text, fm["type"], config=chunk_config)
            # Chunks mit Metadaten anreichern
            chunk_pls = make_chunk_payloads(fm, note_pl["path"], chunks, note_text=body_text)
            vecs = []
            if chunk_pls:
                texts = [c.get("window") or c.get("text") or "" for c in chunk_pls]
                try:
                    if hasattr(self.embedder, 'embed_documents'):
                vecs = await self.embedder.embed_documents(texts)
                    else:
                        for t in texts:
                            v = await self.embedder.embed_query(t)
                            vecs.append(v)
                except Exception as e:
                    logger.error(f"Embedding failed: {e}")
                    raise RuntimeError(f"Embedding failed: {e}")
-            # Kanten generieren
+            # --- WP-22: Kanten-Extraktion & Validierung ---
            try:
                raw_edges = build_edges_for_note(
                    note_id, 
                    chunk_pls, 
                    note_level_references=note_pl.get("references", []),
                    include_note_scope_refs=note_scope_refs
                )
            except TypeError:
                raw_edges = build_edges_for_note(note_id, chunk_pls)
            # --- WP-22: Edge Registry Validation (Teil B) ---
            edges = []
-            if raw_edges:
+            context = {"file": file_path, "note_id": note_id}
-                for edge in raw_edges:
+
-                    original_kind = edge.get("kind", "related_to")
+            # A. Explizite User-Kanten mit Zeilennummern
-                    # Normalisierung über die Registry (Alias-Auflösung)
+            explicit_edges = extract_edges_with_context(parsed)
-                    canonical_kind = edge_registry.resolve(original_kind)
+            for e in explicit_edges:
-                    edge["kind"] = canonical_kind
+                e["kind"] = edge_registry.resolve(edge_type=e["kind"], provenance="explicit", context={**context, "line": e.get("line")})
-                    edges.append(edge)
+                edges.append(e)
            # B. WP-20: Smart AI Edges (Hybrid Acceleration)
            ai_edges = await self._perform_smart_edge_allocation(body_text, note_id)
            for e in ai_edges:
                e["kind"] = edge_registry.resolve(edge_type=e.get("kind"), provenance="semantic_ai", context={**context, "line": e.get("line")})
                edges.append(e)
            # C. System-Kanten (Struktur: belongs_to, next, prev)
            raw_system_edges = build_edges_for_note(note_id, chunk_pls, note_level_references=note_pl.get("references", []))
            for e in raw_system_edges:
                e["kind"] = edge_registry.resolve(edge_type=e.get("kind", "belongs_to"), provenance="structure", context={**context, "line": "system"})
                if e["kind"]: edges.append(e)
        except Exception as e:
            logger.error(f"Processing failed: {e}", exc_info=True)
@ -278,31 +283,23 @@ class IngestionService:
        # 6. Upsert in Qdrant
        try:
            # Alte Fragmente löschen, um "Geister-Chunks" zu vermeiden
            if purge_before and has_old:
                self._purge_artifacts(note_id)
            # Note Metadaten
            n_name, n_pts = points_for_note(self.prefix, note_pl, None, self.dim)
            upsert_batch(self.client, n_name, n_pts)
            # Chunks (Vektoren)
            if chunk_pls and vecs:
                c_name, c_pts = points_for_chunks(self.prefix, chunk_pls, vecs)
                upsert_batch(self.client, c_name, c_pts)
            # Kanten
            if edges:
                e_name, e_pts = points_for_edges(self.prefix, edges)
                upsert_batch(self.client, e_name, e_pts)
            return {
-                "path": file_path,
+                "path": file_path, "status": "success", "changed": True, "note_id": note_id,
-                "status": "success",
+                "chunks_count": len(chunk_pls), "edges_count": len(edges)
                "changed": True,
                "note_id": note_id,
                "chunks_count": len(chunk_pls),
                "edges_count": len(edges)
            }
        except Exception as e:
            logger.error(f"Upsert failed: {e}", exc_info=True)
@ -319,19 +316,17 @@ class IngestionService:
        except: return None
    def _artifacts_missing(self, note_id: str) -> Tuple[bool, bool]:
-        """Prüft, ob Chunks oder Kanten für eine Note fehlen (Integritätscheck)."""
+        """Prüft, ob Chunks oder Kanten für eine Note fehlen."""
        from qdrant_client.http import models as rest
        c_col = f"{self.prefix}_chunks"
        e_col = f"{self.prefix}_edges"
        try:
            f = rest.Filter(must=[rest.FieldCondition(key="note_id", match=rest.MatchValue(value=note_id))])
-            c_pts, _ = self.client.scroll(collection_name=c_col, scroll_filter=f, limit=1)
+            c_pts, _ = self.client.scroll(collection_name=f"{self.prefix}_chunks", scroll_filter=f, limit=1)
-            e_pts, _ = self.client.scroll(collection_name=e_col, scroll_filter=f, limit=1)
+            e_pts, _ = self.client.scroll(collection_name=f"{self.prefix}_edges", scroll_filter=f, limit=1)
            return (not bool(c_pts)), (not bool(e_pts))
        except: return True, True
    def _purge_artifacts(self, note_id: str):
-        """Löscht alle Chunks und Edges einer Note (vor dem Neu-Schreiben)."""
+        """Löscht alle Chunks und Edges einer Note."""
        from qdrant_client.http import models as rest
        f = rest.Filter(must=[rest.FieldCondition(key="note_id", match=rest.MatchValue(value=note_id))])
        selector = rest.FilterSelector(filter=f)
@ -341,7 +336,7 @@ class IngestionService:
            except Exception: pass
    async def create_from_text(self, markdown_content: str, filename: str, vault_root: str, folder: str = "00_Inbox") -> Dict[str, Any]:
-        """Hilfsmethode zur Erstellung einer Note aus einem Textstream (Editor-Save)."""
+        """Hilfsmethode zur Erstellung einer Note aus einem Textstream."""
        target_dir = os.path.join(vault_root, folder)
        os.makedirs(target_dir, exist_ok=True)
        file_path = os.path.join(target_dir, filename)
@ -351,7 +346,6 @@ class IngestionService:
                f.flush()
                os.fsync(f.fileno())
            await asyncio.sleep(0.1) 
            logger.info(f"Written file to {file_path}")
        except Exception as e:
            return {"status": "error", "error": f"Disk write failed: {str(e)}"}
        return await self.process_file(file_path=file_path, vault_root=vault_root, apply=True, force_replace=True, purge_before=True)
--- a/app/routers/ingest.py
+++ b/app/routers/ingest.py
@ -1,8 +1,9 @@
 """
 FILE: app/routers/ingest.py
 DESCRIPTION: Endpunkte für WP-11. Nimmt Markdown entgegen.
-Refactored für WP-14: Nutzt BackgroundTasks für non-blocking Save.
+             Refactored für WP-14: Nutzt BackgroundTasks für non-blocking Save.
-VERSION: 0.7.0 (Fix: Timeout WP-14)
+             Update WP-20: Unterstützung für Hybrid-Cloud-Analyse Feedback.
 VERSION: 0.8.0 (WP-20 Hybrid Ready)
 STATUS: Active
 DEPENDENCIES: app.core.ingestion, app.services.discovery, fastapi, pydantic
 """
@ -44,6 +45,7 @@ class SaveResponse(BaseModel):
 async def run_ingestion_task(markdown_content: str, filename: str, vault_root: str, folder: str):
    """
    Führt die Ingestion im Hintergrund aus, damit der Request nicht blockiert.
    Integrierter WP-20 Hybrid-Modus über den IngestionService.
    """
    logger.info(f"🔄 Background Task started: Ingesting {filename}...")
    try:
@ -80,15 +82,17 @@ async def analyze_draft(req: AnalyzeRequest):
 async def save_note(req: SaveRequest, background_tasks: BackgroundTasks):
    """
    WP-14 Fix: Startet Ingestion im Hintergrund (Fire & Forget).
-    Verhindert Timeouts bei aktiver Smart-Edge-Allocation (WP-15).
+    Verhindert Timeouts bei aktiver Smart-Edge-Allocation (WP-15) und Cloud-Hybrid-Modus (WP-20).
    """
    try:
        vault_root = os.getenv("MINDNET_VAULT_ROOT", "./vault")
        abs_vault_root = os.path.abspath(vault_root)
        if not os.path.exists(abs_vault_root):
-            try: os.makedirs(abs_vault_root, exist_ok=True)
+            try: 
-            except: pass
+                os.makedirs(abs_vault_root, exist_ok=True)
            except Exception as e:
                logger.warning(f"Could not create vault root: {e}")
        final_filename = req.filename or f"draft_{int(time.time())}.md"
@ -109,7 +113,7 @@ async def save_note(req: SaveRequest, background_tasks: BackgroundTasks):
            status="queued",
            file_path=os.path.join(req.folder, final_filename),
            note_id="pending", 
-            message="Speicherung & KI-Analyse im Hintergrund gestartet.",
+            message="Speicherung & Hybrid-KI-Analyse (WP-20) im Hintergrund gestartet.",
            stats={
                "chunks": -1, # Indikator für Async
                "edges": -1
--- a/app/services/llm_service.py
+++ b/app/services/llm_service.py
@ -1,11 +1,11 @@
 """
 FILE: app/services/llm_service.py
-DESCRIPTION: Asynchroner Client für Ollama. Verwaltet Prompts und Background-Last (Semaphore).
+DESCRIPTION: Hybrid-Client für Ollama & Google Gemini. 
-VERSION: 2.8.0
+             Verwaltet Prompts, Background-Last (Semaphore) und Cloud-Routing.
 VERSION: 3.1.0 (WP-20 Full Integration: Provider-Aware Prompting)
 STATUS: Active
-DEPENDENCIES: httpx, yaml, asyncio, app.config
+DEPENDENCIES: httpx, yaml, asyncio, google-generativeai, app.config
 EXTERNAL_CONFIG: config/prompts.yaml
 LAST_ANALYSIS: 2025-12-15
 """
 import httpx
@ -13,47 +13,47 @@ import yaml
 import logging
 import os
 import asyncio
 import json
 import google.generativeai as genai
 from pathlib import Path
 from typing import Optional, Dict, Any, Literal
 from app.config import get_settings
 logger = logging.getLogger(__name__)
 class Settings:
    OLLAMA_URL = os.getenv("MINDNET_OLLAMA_URL", "http://127.0.0.1:11434")
    LLM_TIMEOUT = float(os.getenv("MINDNET_LLM_TIMEOUT", 300.0))
    LLM_MODEL = os.getenv("MINDNET_LLM_MODEL", "phi3:mini")
    PROMPTS_PATH = os.getenv("MINDNET_PROMPTS_PATH", "./config/prompts.yaml")
    # NEU: Konfigurierbares Limit für Hintergrund-Last
    # Default auf 2 (konservativ), kann in .env erhöht werden.
    BACKGROUND_LIMIT = int(os.getenv("MINDNET_LLM_BACKGROUND_LIMIT", "2"))
 def get_settings():
    return Settings()
 class LLMService:
-    # GLOBALER SEMAPHOR (Lazy Initialization)
+    # GLOBALER SEMAPHOR für Hintergrund-Last Steuerung (WP-06 / WP-20)
    # Wir initialisieren ihn erst, wenn wir die Settings kennen.
    _background_semaphore = None
    def __init__(self):
        self.settings = get_settings()
        self.prompts = self._load_prompts()
-        # Initialisiere Semaphore einmalig auf Klassen-Ebene basierend auf Config
+        # Initialisiere Semaphore einmalig auf Klassen-Ebene
        if LLMService._background_semaphore is None:
-            limit = self.settings.BACKGROUND_LIMIT
+            limit = getattr(self.settings, "BACKGROUND_LIMIT", 2)
            logger.info(f"🚦 LLMService: Initializing Background Semaphore with limit: {limit}")
            LLMService._background_semaphore = asyncio.Semaphore(limit)
        # Ollama Setup
        self.timeout = httpx.Timeout(self.settings.LLM_TIMEOUT, connect=10.0)
-        
+        self.ollama_client = httpx.AsyncClient(
        self.client = httpx.AsyncClient(
            base_url=self.settings.OLLAMA_URL, 
            timeout=self.timeout
        )
        # Gemini Setup [WP-20]
        if hasattr(self.settings, "GOOGLE_API_KEY") and self.settings.GOOGLE_API_KEY:
            genai.configure(api_key=self.settings.GOOGLE_API_KEY)
            model_name = getattr(self.settings, "GEMINI_MODEL", "gemini-1.5-flash")
            self.gemini_model = genai.GenerativeModel(model_name)
            logger.info(f"✨ LLMService: Gemini Cloud Mode active ({model_name})")
        else:
            self.gemini_model = None
            logger.warning("⚠️ LLMService: No GOOGLE_API_KEY found. Gemini mode disabled.")
    def _load_prompts(self) -> dict:
        """Lädt die Prompt-Konfiguration aus der YAML-Datei."""
        path = Path(self.settings.PROMPTS_PATH)
        if not path.exists(): return {}
        try:
@ -62,31 +62,74 @@ class LLMService:
            logger.error(f"Failed to load prompts: {e}")
            return {}
    def get_prompt(self, key: str, provider: str = None) -> str:
        """
        Wählt das Template basierend auf dem Provider aus (WP-20).
        Unterstützt sowohl flache Strings als auch Dictionary-basierte Provider-Zweige.
        """
        active_provider = provider or getattr(self.settings, "MINDNET_LLM_PROVIDER", "ollama")
        data = self.prompts.get(key, "")
        if isinstance(data, dict):
            # Versuche den Provider-Key, Fallback auf 'ollama'
            return data.get(active_provider, data.get("ollama", ""))
        return str(data)
    async def generate_raw_response(
        self, 
        prompt: str, 
        system: str = None, 
        force_json: bool = False,
-        max_retries: int = 0,     
+        max_retries: int = 2,     
        base_delay: float = 2.0,
-        priority: Literal["realtime", "background"] = "realtime"
+        priority: Literal["realtime", "background"] = "realtime",
        provider: Optional[str] = None
    ) -> str:
        """
-        Führt einen LLM Call aus.
+        Führt einen LLM Call aus mit Priority-Handling und Provider-Wahl.
        priority="realtime": Chat (Sofort, keine Bremse).
        priority="background": Import/Analyse (Gedrosselt durch Semaphore).
        """
        # Bestimme Provider: Parameter-Override > Config-Default
        target_provider = provider or getattr(self.settings, "MINDNET_LLM_PROVIDER", "ollama")
        use_semaphore = (priority == "background")
        if use_semaphore and LLMService._background_semaphore:
            async with LLMService._background_semaphore:
-                return await self._execute_request(prompt, system, force_json, max_retries, base_delay)
+                return await self._dispatch_request(target_provider, prompt, system, force_json, max_retries, base_delay)
        else:
-            # Realtime oder Fallback (falls Semaphore Init fehlschlug)
+            return await self._dispatch_request(target_provider, prompt, system, force_json, max_retries, base_delay)
            return await self._execute_request(prompt, system, force_json, max_retries, base_delay)
-    async def _execute_request(self, prompt, system, force_json, max_retries, base_delay):
+    async def _dispatch_request(self, provider, prompt, system, force_json, max_retries, base_delay):
        """Routet die Anfrage an den gewählten Provider mit Fallback-Logik."""
        try:
            if provider == "gemini" and self.gemini_model:
                return await self._execute_gemini(prompt, system, force_json)
            else:
                return await self._execute_ollama(prompt, system, force_json, max_retries, base_delay)
        except Exception as e:
            # Automatischer Fallback auf Ollama bei Cloud-Fehlern (WP-20)
            if provider == "gemini" and getattr(self.settings, "LLM_FALLBACK_ENABLED", True):
                logger.warning(f"🔄 Gemini failed: {e}. Falling back to Ollama.")
                return await self._execute_ollama(prompt, system, force_json, max_retries, base_delay)
            raise e
    async def _execute_gemini(self, prompt, system, force_json) -> str:
        """Asynchroner Google Gemini Call (WP-20)."""
        full_prompt = f"System: {system}\n\nUser: {prompt}" if system else prompt
        # Gemini JSON Mode Support
        gen_config = {}
        if force_json:
            gen_config["response_mime_type"] = "application/json"
        response = await self.gemini_model.generate_content_async(
            full_prompt,
            generation_config=gen_config
        )
        return response.text.strip()
    async def _execute_ollama(self, prompt, system, force_json, max_retries, base_delay) -> str:
        """Ollama Call mit exponentieller Backoff-Retry-Logik."""
        payload: Dict[str, Any] = {
            "model": self.settings.LLM_MODEL,
            "prompt": prompt,
@ -96,52 +139,47 @@ class LLMService:
                "num_ctx": 8192     
            }
        }
-
+        if force_json: payload["format"] = "json"
-        if force_json:
+        if system: payload["system"] = system
             payload["format"] = "json"
        if system:
            payload["system"] = system
        attempt = 0
        while True:
            try:
-                response = await self.client.post("/api/generate", json=payload)
+                response = await self.ollama_client.post("/api/generate", json=payload)
                if response.status_code == 200:
-                    data = response.json()
+                    return response.json().get("response", "").strip()
                    return data.get("response", "").strip()
                else:
                response.raise_for_status()
            except Exception as e:
                attempt += 1
                if attempt > max_retries:
-                    logger.error(f"LLM Final Error (Versuch {attempt}): {e}")
+                    logger.error(f"Ollama Error after {attempt} retries: {e}")
                    raise e
-                
+                # Exponentieller Backoff: base_delay * (2 ^ (attempt - 1))
                wait_time = base_delay * (2 ** (attempt - 1))
-                logger.warning(f"⚠️ LLM Retry ({attempt}/{max_retries}) in {wait_time}s: {e}")
+                logger.warning(f"⚠️ Ollama attempt {attempt} failed. Retrying in {wait_time}s...")
                await asyncio.sleep(wait_time)
    async def generate_rag_response(self, query: str, context_str: str) -> str:
-        """
+        """Standard RAG Chat-Interface mit Provider-spezifischen Templates."""
-        Chat-Wrapper: Immer Realtime.
+        provider = getattr(self.settings, "MINDNET_LLM_PROVIDER", "ollama")
-        """
+        
-        system_prompt = self.prompts.get("system_prompt", "")
+        # Holen der Templates über die neue get_prompt Methode
-        rag_template = self.prompts.get("rag_template", "{context_str}\n\n{query}")
+        system_prompt = self.get_prompt("system_prompt", provider)
        rag_template = self.get_prompt("rag_template", provider)
        # Fallback für RAG Template Struktur
        if not rag_template:
            rag_template = "{context_str}\n\n{query}"
        final_prompt = rag_template.format(context_str=context_str, query=query)
        return await self.generate_raw_response(
            final_prompt, 
            system=system_prompt, 
            max_retries=0,
            force_json=False,
            priority="realtime"
        )
    async def close(self):
-        if self.client:
+        """Schließt alle offenen HTTP-Verbindungen."""
-            await self.client.aclose()
+        if self.ollama_client:
            await self.ollama_client.aclose()
--- a/config/decision_engine.yaml
+++ b/config/decision_engine.yaml
@ -1,31 +1,37 @@
 # config/decision_engine.yaml
 # Steuerung der Decision Engine (Intent Recognition & Graph Routing)
-# Version: 2.5.0 (WP-22: Semantic Graph Routing)
+# VERSION: 2.6.0 (WP-20: Hybrid LLM & WP-22: Semantic Graph Routing)
 # STATUS: Active
-version: 2.5
+version: 2.6
 settings:
  llm_fallback_enabled: true
  # Strategie für den Router selbst (Welches Modell erkennt den Intent?)
  # "auto" nutzt den in MINDNET_LLM_PROVIDER gesetzten Standard.
  router_provider: "auto" 
  # Few-Shot Prompting für den LLM-Router (Slow Path)
  # Gemini 1.5 nutzt diesen Kontext für hochpräzise Intent-Erkennung.
  llm_router_prompt: |
-    Du bist ein Klassifikator. Analysiere die Nachricht und wähle die passende Strategie.
+    Du bist der zentrale Intent-Klassifikator für Mindnet, einen digitalen Zwilling.
    Analysiere die Nachricht und wähle die passende Strategie.
    Antworte NUR mit dem Namen der Strategie.
    STRATEGIEN:
    - INTERVIEW: User will Wissen erfassen, Notizen anlegen oder Dinge festhalten.
-    - DECISION: Rat, Strategie, Vor/Nachteile, "Soll ich".
+    - DECISION: Rat, Strategie, Abwägung von Werten, "Soll ich tun X?".
-    - EMPATHY: Gefühle, Frust, Freude, Probleme.
+    - EMPATHY: Gefühle, Reflexion der eigenen Verfassung, Frust, Freude.
-    - CODING: Code, Syntax, Programmierung.
+    - CODING: Code-Erstellung, Debugging, technische Dokumentation.
-    - FACT: Wissen, Fakten, Definitionen.
+    - FACT: Reine Wissensabfrage, Definitionen, Suchen von Informationen.
    BEISPIELE:
-    User: "Wie funktioniert Qdrant?" -> FACT
+    User: "Wie funktioniert die Qdrant-Vektor-DB?" -> FACT
-    User: "Soll ich Qdrant nutzen?" -> DECISION
+    User: "Soll ich mein Startup jetzt verkaufen?" -> DECISION
-    User: "Ich möchte etwas notieren" -> INTERVIEW
+    User: "Notiere mir kurz meine Gedanken zum Meeting." -> INTERVIEW
-    User: "Lass uns das festhalten" -> INTERVIEW
+    User: "Ich fühle mich heute sehr erschöpft." -> EMPATHY
-    User: "Schreibe ein Python Script" -> CODING
+    User: "Schreibe eine FastAPI-Route für den Ingest." -> CODING
    User: "Alles ist grau und sinnlos" -> EMPATHY
    NACHRICHT: "{query}"
@ -35,6 +41,7 @@ strategies:
  # 1. Fakten-Abfrage (Fallback & Default)
  FACT:
    description: "Reine Wissensabfrage."
    preferred_provider: "ollama" # Schnell und lokal ausreichend
    trigger_keywords: [] 
    inject_types: []
    # WP-22: Definitionen & Hierarchien bevorzugen
@ -46,9 +53,10 @@ strategies:
    prompt_template: "rag_template"
    prepend_instruction: null
-  # 2. Entscheidungs-Frage
+  # 2. Entscheidungs-Frage (Power-Strategie)
  DECISION:
    description: "Der User sucht Rat, Strategie oder Abwägung."
    preferred_provider: "gemini" # Nutzt Gemini's Reasoning-Power für WP-20
    trigger_keywords: 
      - "soll ich"
      - "meinung"
@ -68,12 +76,13 @@ strategies:
      impacts: 2.0  # NEU: Zeige mir alles, was von dieser Entscheidung betroffen ist!
    prompt_template: "decision_template"
    prepend_instruction: |
-      !!! ENTSCHEIDUNGS-MODUS !!!
+      !!! ENTSCHEIDUNGS-MODUS (HYBRID AI) !!!
      BITTE WÄGE FAKTEN GEGEN FOLGENDE WERTE, PRINZIPIEN UND ZIELE AB:
-  # 3. Empathie / "Ich"-Modus
+  # 3. Empathie / "Ich"-Modus (Privacy-Fokus)
  EMPATHY:
    description: "Reaktion auf emotionale Zustände."
    preferred_provider: "ollama" # Private Emotionen bleiben lokal!
    trigger_keywords:
      - "ich fühle"
      - "traurig"
@ -96,6 +105,7 @@ strategies:
  # 4. Coding / Technical
  CODING:
    description: "Technische Anfragen und Programmierung."
    preferred_provider: "gemini" # Höheres Weltwissen für moderne Libraries
    trigger_keywords:
      - "code"
      - "python"
@ -116,10 +126,9 @@ strategies:
    prepend_instruction: null
  # 5. Interview / Datenerfassung
    # HINWEIS: Spezifische Typen (Projekt, Ziel etc.) werden automatisch 
  # über die types.yaml erkannt. Hier stehen nur generische Trigger.
  INTERVIEW:
    description: "Der User möchte Wissen erfassen."
    preferred_provider: "ollama" # Lokale Erfassung ist performant genug
    trigger_keywords:
      - "neue notiz"
      - "etwas notieren"
@ -135,8 +144,7 @@ strategies:
    edge_boosts: {}
    prompt_template: "interview_template"
    prepend_instruction: null
-    # Schemas: Hier nur der Fallback. 
+    # Schemas kommen aus types.yaml (WP-22)
    # Spezifische Schemas (Project, Experience) kommen jetzt aus types.yaml!
    schemas:
      default: 
        fields: 
--- a/config/prompts.yaml
+++ b/config/prompts.yaml
@ -15,7 +15,8 @@ system_prompt: |
 # ---------------------------------------------------------
 # 1. STANDARD: Fakten & Wissen (Intent: FACT)
 # ---------------------------------------------------------
-rag_template: |
+rag_template:
  ollama: |
    QUELLEN (WISSEN):
    =========================================
    {context_str}
@ -27,11 +28,16 @@ rag_template: |
    ANWEISUNG:
    Beantworte die Frage präzise basierend auf den Quellen.
    Fasse die Informationen zusammen. Sei objektiv und neutral.
  gemini: |
    Analysiere diesen Kontext meines digitalen Zwillings:
    {context_str}
    Beantworte die Anfrage detailliert und prüfe auf Widersprüche: {query}
 # ---------------------------------------------------------
 # 2. DECISION: Strategie & Abwägung (Intent: DECISION)
 # ---------------------------------------------------------
-decision_template: |
+decision_template:
  ollama: |
    KONTEXT (FAKTEN & STRATEGIE):
    =========================================
    {context_str}
@ -50,11 +56,15 @@ decision_template: |
    - **Analyse:** (Kurze Zusammenfassung der Fakten)
    - **Abgleich:** (Gibt es Konflikte mit Werten/Zielen? Nenne die Quelle!)
    - **Empfehlung:** (Klare Meinung: Ja/Nein/Vielleicht mit Begründung)
  gemini: |
    Agierte als Senior Strategy Consultant. Nutze den Kontext {context_str}, um die Frage {query} 
    tiefgreifend gegen meine langfristigen Ziele abzuwägen.
 # ---------------------------------------------------------
 # 3. EMPATHY: Der Spiegel / "Ich"-Modus (Intent: EMPATHY)
 # ---------------------------------------------------------
-empathy_template: |
+empathy_template:
  ollama: |
    KONTEXT (ERFAHRUNGEN & GLAUBENSSÄTZE):
    =========================================
    {context_str}
@ -75,7 +85,8 @@ empathy_template: |
 # ---------------------------------------------------------
 # 4. TECHNICAL: Der Coder (Intent: CODING)
 # ---------------------------------------------------------
-technical_template: |
+technical_template:
  ollama: |
    KONTEXT (DOCS & SNIPPETS):
    =========================================
    {context_str}
@ -97,7 +108,8 @@ technical_template: |
 # ---------------------------------------------------------
 # 5. INTERVIEW: Der "One-Shot Extractor" (Performance Mode)
 # ---------------------------------------------------------
-interview_template: |
+interview_template:
  ollama: |
    TASK:
    Du bist ein professioneller Ghostwriter. Verwandle den "USER INPUT" in eine strukturierte Notiz vom Typ '{target_type}'.
@ -135,7 +147,8 @@ interview_template: |
 # ---------------------------------------------------------
 # 6. EDGE_ALLOCATION: Kantenfilter (Intent: OFFLINE_FILTER)
 # ---------------------------------------------------------
-edge_allocation_template: |
+edge_allocation_template:
  ollama: |
    TASK:
    Du bist ein strikter Selektor. Du erhältst eine Liste von "Kandidaten-Kanten" (Strings).
    Wähle jene aus, die inhaltlich im "Textabschnitt" vorkommen oder relevant sind.
@ -160,3 +173,9 @@ edge_allocation_template: |
    Output: ["blocks:Projekt Alpha", "needs:Budget"]
    DEIN OUTPUT (JSON):
  gemini: |
    Extrahiere semantische Kanten für den Graphen ({note_id}). 
    Finde auch implizite Verbindungen. 
    JSON: [{"to": "X", "kind": "Y", "reason": "Z"}].
    TEXT: {text}
--- a/requirements.txt
+++ b/requirements.txt
@ -35,3 +35,6 @@ streamlit>=1.39.0
 # Visualization (Parallelbetrieb)
 streamlit-agraph>=0.0.45
 st-cytoscape
 # Google gemini API
 google-generativeai>=0.8.3