WP20 initial
This commit is contained in:
parent
2d43e0596c
commit
234949800b
|
|
@ -1,10 +1,11 @@
|
||||||
"""
|
"""
|
||||||
FILE: app/config.py
|
FILE: app/config.py
|
||||||
DESCRIPTION: Zentrale Pydantic-Konfiguration (Env-Vars für Qdrant, LLM, Retriever).
|
DESCRIPTION: Zentrale Pydantic-Konfiguration (Env-Vars für Qdrant, LLM, Retriever).
|
||||||
VERSION: 0.4.0
|
Erweitert um WP-20 Hybrid-Optionen.
|
||||||
|
VERSION: 0.5.0
|
||||||
STATUS: Active
|
STATUS: Active
|
||||||
DEPENDENCIES: os, functools, pathlib
|
DEPENDENCIES: os, functools, pathlib
|
||||||
LAST_ANALYSIS: 2025-12-15
|
LAST_ANALYSIS: 2025-12-23
|
||||||
"""
|
"""
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
import os
|
import os
|
||||||
|
|
@ -12,29 +13,38 @@ from functools import lru_cache
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
class Settings:
|
class Settings:
|
||||||
# Qdrant
|
# Qdrant Verbindung
|
||||||
QDRANT_URL: str = os.getenv("QDRANT_URL", "http://127.0.0.1:6333")
|
QDRANT_URL: str = os.getenv("QDRANT_URL", "http://127.0.0.1:6333")
|
||||||
QDRANT_API_KEY: str | None = os.getenv("QDRANT_API_KEY")
|
QDRANT_API_KEY: str | None = os.getenv("QDRANT_API_KEY")
|
||||||
COLLECTION_PREFIX: str = os.getenv("MINDNET_PREFIX", "mindnet")
|
COLLECTION_PREFIX: str = os.getenv("MINDNET_PREFIX", "mindnet")
|
||||||
VECTOR_SIZE: int = int(os.getenv("MINDNET_VECTOR_SIZE", "384"))
|
VECTOR_SIZE: int = int(os.getenv("MINDNET_VECTOR_SIZE", "384"))
|
||||||
DISTANCE: str = os.getenv("MINDNET_DISTANCE", "Cosine")
|
DISTANCE: str = os.getenv("MINDNET_DISTANCE", "Cosine")
|
||||||
|
|
||||||
# Embeddings
|
# Embeddings (lokal)
|
||||||
MODEL_NAME: str = os.getenv("MINDNET_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
|
MODEL_NAME: str = os.getenv("MINDNET_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
|
||||||
|
|
||||||
# WP-05 LLM / Ollama
|
# WP-20 Hybrid LLM Provider
|
||||||
|
# Erlaubt: "ollama" oder "gemini"
|
||||||
|
MINDNET_LLM_PROVIDER: str = os.getenv("MINDNET_LLM_PROVIDER", "ollama").lower()
|
||||||
|
GOOGLE_API_KEY: str | None = os.getenv("GOOGLE_API_KEY")
|
||||||
|
GEMINI_MODEL: str = os.getenv("MINDNET_GEMINI_MODEL", "gemini-1.5-flash")
|
||||||
|
LLM_FALLBACK_ENABLED: bool = os.getenv("MINDNET_LLM_FALLBACK", "true").lower() == "true"
|
||||||
|
|
||||||
|
# WP-05 LLM / Ollama (Local)
|
||||||
OLLAMA_URL: str = os.getenv("MINDNET_OLLAMA_URL", "http://127.0.0.1:11434")
|
OLLAMA_URL: str = os.getenv("MINDNET_OLLAMA_URL", "http://127.0.0.1:11434")
|
||||||
LLM_MODEL: str = os.getenv("MINDNET_LLM_MODEL", "phi3:mini")
|
LLM_MODEL: str = os.getenv("MINDNET_LLM_MODEL", "phi3:mini")
|
||||||
PROMPTS_PATH: str = os.getenv("MINDNET_PROMPTS_PATH", "config/prompts.yaml")
|
PROMPTS_PATH: str = os.getenv("MINDNET_PROMPTS_PATH", "config/prompts.yaml")
|
||||||
|
|
||||||
# NEU für WP-06
|
# WP-06 / WP-14 Performance & Timeouts
|
||||||
LLM_TIMEOUT: float = float(os.getenv("MINDNET_LLM_TIMEOUT", "120.0"))
|
LLM_TIMEOUT: float = float(os.getenv("MINDNET_LLM_TIMEOUT", "120.0"))
|
||||||
DECISION_CONFIG_PATH: str = os.getenv("MINDNET_DECISION_CONFIG", "config/decision_engine.yaml")
|
DECISION_CONFIG_PATH: str = os.getenv("MINDNET_DECISION_CONFIG", "config/decision_engine.yaml")
|
||||||
|
BACKGROUND_LIMIT: int = int(os.getenv("MINDNET_LLM_BACKGROUND_LIMIT", "2"))
|
||||||
|
|
||||||
# API
|
# API & Debugging
|
||||||
DEBUG: bool = os.getenv("DEBUG", "false").lower() == "true"
|
DEBUG: bool = os.getenv("DEBUG", "false").lower() == "true"
|
||||||
|
MINDNET_VAULT_ROOT: str = os.getenv("MINDNET_VAULT_ROOT", "./vault")
|
||||||
|
|
||||||
# WP-04 Retriever Defaults
|
# WP-04 Retriever Gewichte (Semantik vs. Graph)
|
||||||
RETRIEVER_W_SEM: float = float(os.getenv("MINDNET_WP04_W_SEM", "0.70"))
|
RETRIEVER_W_SEM: float = float(os.getenv("MINDNET_WP04_W_SEM", "0.70"))
|
||||||
RETRIEVER_W_EDGE: float = float(os.getenv("MINDNET_WP04_W_EDGE", "0.25"))
|
RETRIEVER_W_EDGE: float = float(os.getenv("MINDNET_WP04_W_EDGE", "0.25"))
|
||||||
RETRIEVER_W_CENT: float = float(os.getenv("MINDNET_WP04_W_CENT", "0.05"))
|
RETRIEVER_W_CENT: float = float(os.getenv("MINDNET_WP04_W_CENT", "0.05"))
|
||||||
|
|
|
||||||
|
|
@ -1,16 +1,17 @@
|
||||||
"""
|
"""
|
||||||
FILE: app/core/ingestion.py
|
FILE: app/core/ingestion.py
|
||||||
DESCRIPTION: Haupt-Ingestion-Logik. Transformiert Markdown in den Graphen (Notes, Chunks, Edges).
|
DESCRIPTION: Haupt-Ingestion-Logik. Transformiert Markdown in den Graphen (Notes, Chunks, Edges).
|
||||||
FIX: Korrekte Priorisierung von Frontmatter für chunk_profile und retriever_weight.
|
WP-20: Integration von Smart Edge Allocation via Hybrid LLM (Gemini/Ollama).
|
||||||
Lade Chunk-Config basierend auf dem effektiven Profil, nicht nur dem Notiz-Typ.
|
|
||||||
WP-22: Integration von Content Lifecycle (Status Gate) und Edge Registry Validation.
|
WP-22: Integration von Content Lifecycle (Status Gate) und Edge Registry Validation.
|
||||||
|
WP-22: Kontextsensitive Kanten-Validierung mit Fundort-Reporting (Zeilennummern).
|
||||||
WP-22: Multi-Hash Refresh für konsistente Change Detection.
|
WP-22: Multi-Hash Refresh für konsistente Change Detection.
|
||||||
VERSION: 2.8.6 (WP-22 Lifecycle & Registry)
|
VERSION: 2.11.0 (WP-20 Full Integration: Hybrid Smart Edges)
|
||||||
STATUS: Active
|
STATUS: Active
|
||||||
DEPENDENCIES: app.core.parser, app.core.note_payload, app.core.chunker, app.core.derive_edges, app.core.qdrant*, app.services.embeddings_client, app.services.edge_registry
|
DEPENDENCIES: app.core.parser, app.core.note_payload, app.core.chunker, app.services.llm_service, app.services.edge_registry
|
||||||
EXTERNAL_CONFIG: config/types.yaml
|
EXTERNAL_CONFIG: config/types.yaml, config/prompts.yaml
|
||||||
"""
|
"""
|
||||||
import os
|
import os
|
||||||
|
import json
|
||||||
import logging
|
import logging
|
||||||
import asyncio
|
import asyncio
|
||||||
import time
|
import time
|
||||||
|
|
@ -21,6 +22,7 @@ from app.core.parser import (
|
||||||
read_markdown,
|
read_markdown,
|
||||||
normalize_frontmatter,
|
normalize_frontmatter,
|
||||||
validate_required_frontmatter,
|
validate_required_frontmatter,
|
||||||
|
extract_edges_with_context, #
|
||||||
)
|
)
|
||||||
from app.core.note_payload import make_note_payload
|
from app.core.note_payload import make_note_payload
|
||||||
from app.core.chunker import assemble_chunks, get_chunk_config
|
from app.core.chunker import assemble_chunks, get_chunk_config
|
||||||
|
|
@ -42,6 +44,7 @@ from app.core.qdrant_points import (
|
||||||
|
|
||||||
from app.services.embeddings_client import EmbeddingsClient
|
from app.services.embeddings_client import EmbeddingsClient
|
||||||
from app.services.edge_registry import registry as edge_registry
|
from app.services.edge_registry import registry as edge_registry
|
||||||
|
from app.services.llm_service import LLMService #
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
@ -66,18 +69,15 @@ def effective_chunk_profile_name(fm: dict, note_type: str, reg: dict) -> str:
|
||||||
Ermittelt den Namen des zu nutzenden Chunk-Profils.
|
Ermittelt den Namen des zu nutzenden Chunk-Profils.
|
||||||
Priorität: 1. Frontmatter Override -> 2. Type Config -> 3. Global Default
|
Priorität: 1. Frontmatter Override -> 2. Type Config -> 3. Global Default
|
||||||
"""
|
"""
|
||||||
# 1. Frontmatter Override
|
|
||||||
override = fm.get("chunking_profile") or fm.get("chunk_profile")
|
override = fm.get("chunking_profile") or fm.get("chunk_profile")
|
||||||
if override and isinstance(override, str):
|
if override and isinstance(override, str):
|
||||||
return override
|
return override
|
||||||
|
|
||||||
# 2. Type Config
|
|
||||||
t_cfg = reg.get("types", {}).get(note_type, {})
|
t_cfg = reg.get("types", {}).get(note_type, {})
|
||||||
if t_cfg:
|
if t_cfg:
|
||||||
cp = t_cfg.get("chunking_profile") or t_cfg.get("chunk_profile")
|
cp = t_cfg.get("chunking_profile") or t_cfg.get("chunk_profile")
|
||||||
if cp: return cp
|
if cp: return cp
|
||||||
|
|
||||||
# 3. Global Default
|
|
||||||
return reg.get("defaults", {}).get("chunking_profile", "sliding_standard")
|
return reg.get("defaults", {}).get("chunking_profile", "sliding_standard")
|
||||||
|
|
||||||
def effective_retriever_weight(fm: dict, note_type: str, reg: dict) -> float:
|
def effective_retriever_weight(fm: dict, note_type: str, reg: dict) -> float:
|
||||||
|
|
@ -85,34 +85,32 @@ def effective_retriever_weight(fm: dict, note_type: str, reg: dict) -> float:
|
||||||
Ermittelt das effektive retriever_weight für das Scoring.
|
Ermittelt das effektive retriever_weight für das Scoring.
|
||||||
Priorität: 1. Frontmatter Override -> 2. Type Config -> 3. Global Default
|
Priorität: 1. Frontmatter Override -> 2. Type Config -> 3. Global Default
|
||||||
"""
|
"""
|
||||||
# 1. Frontmatter Override
|
|
||||||
override = fm.get("retriever_weight")
|
override = fm.get("retriever_weight")
|
||||||
if override is not None:
|
if override is not None:
|
||||||
try: return float(override)
|
try: return float(override)
|
||||||
except: pass
|
except: pass
|
||||||
|
|
||||||
# 2. Type Config
|
|
||||||
t_cfg = reg.get("types", {}).get(note_type, {})
|
t_cfg = reg.get("types", {}).get(note_type, {})
|
||||||
if t_cfg and "retriever_weight" in t_cfg:
|
if t_cfg and "retriever_weight" in t_cfg:
|
||||||
return float(t_cfg["retriever_weight"])
|
return float(t_cfg["retriever_weight"])
|
||||||
|
|
||||||
# 3. Global Default
|
|
||||||
return float(reg.get("defaults", {}).get("retriever_weight", 1.0))
|
return float(reg.get("defaults", {}).get("retriever_weight", 1.0))
|
||||||
|
|
||||||
|
|
||||||
class IngestionService:
|
class IngestionService:
|
||||||
def __init__(self, collection_prefix: str = None):
|
def __init__(self, collection_prefix: str = None):
|
||||||
env_prefix = os.getenv("COLLECTION_PREFIX", "mindnet")
|
from app.config import get_settings
|
||||||
self.prefix = collection_prefix or env_prefix
|
self.settings = get_settings() #
|
||||||
|
|
||||||
|
self.prefix = collection_prefix or self.settings.COLLECTION_PREFIX
|
||||||
self.cfg = QdrantConfig.from_env()
|
self.cfg = QdrantConfig.from_env()
|
||||||
self.cfg.prefix = self.prefix
|
self.cfg.prefix = self.prefix
|
||||||
self.client = get_client(self.cfg)
|
self.client = get_client(self.cfg)
|
||||||
self.dim = self.cfg.dim
|
self.dim = self.cfg.VECTOR_SIZE #
|
||||||
self.registry = load_type_registry()
|
self.type_registry = load_type_registry()
|
||||||
self.embedder = EmbeddingsClient()
|
self.embedder = EmbeddingsClient()
|
||||||
|
self.llm = LLMService() #
|
||||||
|
|
||||||
# Change Detection Modus (full oder body)
|
|
||||||
self.active_hash_mode = os.getenv("MINDNET_CHANGE_DETECTION_MODE", "full")
|
self.active_hash_mode = os.getenv("MINDNET_CHANGE_DETECTION_MODE", "full")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
|
@ -122,8 +120,8 @@ class IngestionService:
|
||||||
logger.warning(f"DB init warning: {e}")
|
logger.warning(f"DB init warning: {e}")
|
||||||
|
|
||||||
def _get_chunk_config_by_profile(self, profile_name: str, note_type: str) -> Dict[str, Any]:
|
def _get_chunk_config_by_profile(self, profile_name: str, note_type: str) -> Dict[str, Any]:
|
||||||
"""Holt die Chunker-Parameter (max, target, overlap) für ein spezifisches Profil."""
|
"""Holt die Chunker-Parameter für ein spezifisches Profil."""
|
||||||
profiles = self.registry.get("chunking_profiles", {})
|
profiles = self.type_registry.get("chunking_profiles", {})
|
||||||
if profile_name in profiles:
|
if profile_name in profiles:
|
||||||
cfg = profiles[profile_name].copy()
|
cfg = profiles[profile_name].copy()
|
||||||
if "overlap" in cfg and isinstance(cfg["overlap"], list):
|
if "overlap" in cfg and isinstance(cfg["overlap"], list):
|
||||||
|
|
@ -131,6 +129,43 @@ class IngestionService:
|
||||||
return cfg
|
return cfg
|
||||||
return get_chunk_config(note_type)
|
return get_chunk_config(note_type)
|
||||||
|
|
||||||
|
async def _perform_smart_edge_allocation(self, text: str, note_id: str) -> List[Dict]:
|
||||||
|
"""
|
||||||
|
WP-20: Nutzt den Hybrid LLM Service für die semantische Kanten-Extraktion.
|
||||||
|
Verwendet provider-spezifische Prompts aus der config.
|
||||||
|
"""
|
||||||
|
provider = self.settings.MINDNET_LLM_PROVIDER #
|
||||||
|
|
||||||
|
# Prompt-Lookup (Fallback auf Standard-Struktur falls Key fehlt)
|
||||||
|
prompt_data = self.llm.prompts.get("edge_extraction", {})
|
||||||
|
if isinstance(prompt_data, dict):
|
||||||
|
template = prompt_data.get(provider, prompt_data.get("ollama", ""))
|
||||||
|
else:
|
||||||
|
template = str(prompt_data)
|
||||||
|
|
||||||
|
if not template:
|
||||||
|
template = "Extrahiere semantische Relationen aus: {text}. Antworte als JSON: [{\"to\": \"X\", \"kind\": \"Y\"}]"
|
||||||
|
|
||||||
|
prompt = template.format(text=text[:6000], note_id=note_id)
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Nutzt die Semaphore für Hintergrund-Tasks
|
||||||
|
response_json = await self.llm.generate_raw_response(
|
||||||
|
prompt=prompt,
|
||||||
|
priority="background",
|
||||||
|
force_json=True
|
||||||
|
)
|
||||||
|
data = json.loads(response_json)
|
||||||
|
|
||||||
|
# Anreicherung mit Provenance-Metadaten für WP-22 Registry
|
||||||
|
for item in data:
|
||||||
|
item["provenance"] = "semantic_ai"
|
||||||
|
item["line"] = f"ai-{provider}"
|
||||||
|
return data
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Smart Edge Allocation skipped for {note_id}: {e}")
|
||||||
|
return []
|
||||||
|
|
||||||
async def process_file(
|
async def process_file(
|
||||||
self,
|
self,
|
||||||
file_path: str,
|
file_path: str,
|
||||||
|
|
@ -142,10 +177,7 @@ class IngestionService:
|
||||||
hash_source: str = "parsed",
|
hash_source: str = "parsed",
|
||||||
hash_normalize: str = "canonical"
|
hash_normalize: str = "canonical"
|
||||||
) -> Dict[str, Any]:
|
) -> Dict[str, Any]:
|
||||||
"""
|
"""Verarbeitet eine Markdown-Datei und schreibt sie in den Graphen."""
|
||||||
Verarbeitet eine Markdown-Datei und schreibt sie in den Graphen.
|
|
||||||
Folgt dem 14-Schritte-Workflow.
|
|
||||||
"""
|
|
||||||
result = {"path": file_path, "status": "skipped", "changed": False, "error": None}
|
result = {"path": file_path, "status": "skipped", "changed": False, "error": None}
|
||||||
|
|
||||||
# 1. Parse & Frontmatter Validation
|
# 1. Parse & Frontmatter Validation
|
||||||
|
|
@ -158,42 +190,29 @@ class IngestionService:
|
||||||
logger.error(f"Validation failed for {file_path}: {e}")
|
logger.error(f"Validation failed for {file_path}: {e}")
|
||||||
return {**result, "error": f"Validation failed: {str(e)}"}
|
return {**result, "error": f"Validation failed: {str(e)}"}
|
||||||
|
|
||||||
# --- WP-22: Content Lifecycle Gate (Teil A) ---
|
# --- WP-22: Content Lifecycle Gate ---
|
||||||
status = fm.get("status", "draft").lower().strip()
|
status = fm.get("status", "draft").lower().strip()
|
||||||
|
|
||||||
# Hard Skip für System- oder Archiv-Dateien
|
|
||||||
if status in ["system", "template", "archive", "hidden"]:
|
if status in ["system", "template", "archive", "hidden"]:
|
||||||
logger.info(f"Skipping file {file_path} (Status: {status})")
|
logger.info(f"Skipping file {file_path} (Status: {status})")
|
||||||
return {**result, "status": "skipped", "reason": f"lifecycle_status_{status}"}
|
return {**result, "status": "skipped", "reason": f"lifecycle_status_{status}"}
|
||||||
|
|
||||||
# 2. Type & Config Resolution
|
# 2. Type & Config Resolution
|
||||||
note_type = resolve_note_type(fm.get("type"), self.registry)
|
note_type = resolve_note_type(fm.get("type"), self.type_registry)
|
||||||
fm["type"] = note_type
|
fm["type"] = note_type
|
||||||
|
|
||||||
effective_profile = effective_chunk_profile_name(fm, note_type, self.registry)
|
effective_profile = effective_chunk_profile_name(fm, note_type, self.type_registry)
|
||||||
effective_weight = effective_retriever_weight(fm, note_type, self.registry)
|
effective_weight = effective_retriever_weight(fm, note_type, self.type_registry)
|
||||||
|
|
||||||
fm["chunk_profile"] = effective_profile
|
fm["chunk_profile"] = effective_profile
|
||||||
fm["retriever_weight"] = effective_weight
|
fm["retriever_weight"] = effective_weight
|
||||||
|
|
||||||
# 3. Build Note Payload (Inkl. Multi-Hash für WP-22)
|
# 3. Build Note Payload (Inkl. Multi-Hash für WP-22)
|
||||||
try:
|
try:
|
||||||
note_pl = make_note_payload(
|
note_pl = make_note_payload(parsed, vault_root=vault_root, hash_normalize=hash_normalize, hash_source=hash_source, file_path=file_path)
|
||||||
parsed,
|
|
||||||
vault_root=vault_root,
|
|
||||||
hash_normalize=hash_normalize,
|
|
||||||
hash_source=hash_source,
|
|
||||||
file_path=file_path
|
|
||||||
)
|
|
||||||
# Text Body Fallback
|
|
||||||
if not note_pl.get("fulltext"): note_pl["fulltext"] = getattr(parsed, "body", "") or ""
|
if not note_pl.get("fulltext"): note_pl["fulltext"] = getattr(parsed, "body", "") or ""
|
||||||
|
|
||||||
# Sicherstellen der effektiven Werte im Payload
|
|
||||||
note_pl["retriever_weight"] = effective_weight
|
note_pl["retriever_weight"] = effective_weight
|
||||||
note_pl["chunk_profile"] = effective_profile
|
note_pl["chunk_profile"] = effective_profile
|
||||||
# WP-22: Status speichern
|
|
||||||
note_pl["status"] = status
|
note_pl["status"] = status
|
||||||
|
|
||||||
note_id = note_pl["note_id"]
|
note_id = note_pl["note_id"]
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Payload build failed: {e}")
|
logger.error(f"Payload build failed: {e}")
|
||||||
|
|
@ -205,15 +224,12 @@ class IngestionService:
|
||||||
old_payload = self._fetch_note_payload(note_id)
|
old_payload = self._fetch_note_payload(note_id)
|
||||||
|
|
||||||
has_old = old_payload is not None
|
has_old = old_payload is not None
|
||||||
# Prüfung gegen den aktuell konfigurierten Hash-Modus (body oder full)
|
|
||||||
check_key = f"{self.active_hash_mode}:{hash_source}:{hash_normalize}"
|
check_key = f"{self.active_hash_mode}:{hash_source}:{hash_normalize}"
|
||||||
|
|
||||||
old_hashes = (old_payload or {}).get("hashes")
|
old_hashes = (old_payload or {}).get("hashes", {})
|
||||||
if isinstance(old_hashes, dict): old_hash = old_hashes.get(check_key)
|
old_hash = old_hashes.get(check_key) if isinstance(old_hashes, dict) else None
|
||||||
elif isinstance(old_hashes, str) and self.active_hash_mode == "body": old_hash = old_hashes
|
|
||||||
else: old_hash = None
|
|
||||||
|
|
||||||
new_hash = note_pl.get("hashes", {}).get(check_key)
|
new_hash = note_pl.get("hashes", {}).get(check_key)
|
||||||
|
|
||||||
hash_changed = (old_hash != new_hash)
|
hash_changed = (old_hash != new_hash)
|
||||||
chunks_missing, edges_missing = self._artifacts_missing(note_id)
|
chunks_missing, edges_missing = self._artifacts_missing(note_id)
|
||||||
|
|
||||||
|
|
@ -228,49 +244,38 @@ class IngestionService:
|
||||||
# 5. Processing (Chunking, Embedding, Edge Generation)
|
# 5. Processing (Chunking, Embedding, Edge Generation)
|
||||||
try:
|
try:
|
||||||
body_text = getattr(parsed, "body", "") or ""
|
body_text = getattr(parsed, "body", "") or ""
|
||||||
|
edge_registry.ensure_latest() #
|
||||||
|
|
||||||
# Konfiguration für das spezifische Profil laden
|
|
||||||
chunk_config = self._get_chunk_config_by_profile(effective_profile, note_type)
|
chunk_config = self._get_chunk_config_by_profile(effective_profile, note_type)
|
||||||
|
|
||||||
chunks = await assemble_chunks(fm["id"], body_text, fm["type"], config=chunk_config)
|
chunks = await assemble_chunks(fm["id"], body_text, fm["type"], config=chunk_config)
|
||||||
|
|
||||||
# Chunks mit Metadaten anreichern
|
|
||||||
chunk_pls = make_chunk_payloads(fm, note_pl["path"], chunks, note_text=body_text)
|
chunk_pls = make_chunk_payloads(fm, note_pl["path"], chunks, note_text=body_text)
|
||||||
|
|
||||||
vecs = []
|
vecs = []
|
||||||
if chunk_pls:
|
if chunk_pls:
|
||||||
texts = [c.get("window") or c.get("text") or "" for c in chunk_pls]
|
texts = [c.get("window") or c.get("text") or "" for c in chunk_pls]
|
||||||
try:
|
|
||||||
if hasattr(self.embedder, 'embed_documents'):
|
|
||||||
vecs = await self.embedder.embed_documents(texts)
|
vecs = await self.embedder.embed_documents(texts)
|
||||||
else:
|
|
||||||
for t in texts:
|
|
||||||
v = await self.embedder.embed_query(t)
|
|
||||||
vecs.append(v)
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Embedding failed: {e}")
|
|
||||||
raise RuntimeError(f"Embedding failed: {e}")
|
|
||||||
|
|
||||||
# Kanten generieren
|
# --- WP-22: Kanten-Extraktion & Validierung ---
|
||||||
try:
|
|
||||||
raw_edges = build_edges_for_note(
|
|
||||||
note_id,
|
|
||||||
chunk_pls,
|
|
||||||
note_level_references=note_pl.get("references", []),
|
|
||||||
include_note_scope_refs=note_scope_refs
|
|
||||||
)
|
|
||||||
except TypeError:
|
|
||||||
raw_edges = build_edges_for_note(note_id, chunk_pls)
|
|
||||||
|
|
||||||
# --- WP-22: Edge Registry Validation (Teil B) ---
|
|
||||||
edges = []
|
edges = []
|
||||||
if raw_edges:
|
context = {"file": file_path, "note_id": note_id}
|
||||||
for edge in raw_edges:
|
|
||||||
original_kind = edge.get("kind", "related_to")
|
# A. Explizite User-Kanten mit Zeilennummern
|
||||||
# Normalisierung über die Registry (Alias-Auflösung)
|
explicit_edges = extract_edges_with_context(parsed)
|
||||||
canonical_kind = edge_registry.resolve(original_kind)
|
for e in explicit_edges:
|
||||||
edge["kind"] = canonical_kind
|
e["kind"] = edge_registry.resolve(edge_type=e["kind"], provenance="explicit", context={**context, "line": e.get("line")})
|
||||||
edges.append(edge)
|
edges.append(e)
|
||||||
|
|
||||||
|
# B. WP-20: Smart AI Edges (Hybrid Acceleration)
|
||||||
|
ai_edges = await self._perform_smart_edge_allocation(body_text, note_id)
|
||||||
|
for e in ai_edges:
|
||||||
|
e["kind"] = edge_registry.resolve(edge_type=e.get("kind"), provenance="semantic_ai", context={**context, "line": e.get("line")})
|
||||||
|
edges.append(e)
|
||||||
|
|
||||||
|
# C. System-Kanten (Struktur: belongs_to, next, prev)
|
||||||
|
raw_system_edges = build_edges_for_note(note_id, chunk_pls, note_level_references=note_pl.get("references", []))
|
||||||
|
for e in raw_system_edges:
|
||||||
|
e["kind"] = edge_registry.resolve(edge_type=e.get("kind", "belongs_to"), provenance="structure", context={**context, "line": "system"})
|
||||||
|
if e["kind"]: edges.append(e)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Processing failed: {e}", exc_info=True)
|
logger.error(f"Processing failed: {e}", exc_info=True)
|
||||||
|
|
@ -278,31 +283,23 @@ class IngestionService:
|
||||||
|
|
||||||
# 6. Upsert in Qdrant
|
# 6. Upsert in Qdrant
|
||||||
try:
|
try:
|
||||||
# Alte Fragmente löschen, um "Geister-Chunks" zu vermeiden
|
|
||||||
if purge_before and has_old:
|
if purge_before and has_old:
|
||||||
self._purge_artifacts(note_id)
|
self._purge_artifacts(note_id)
|
||||||
|
|
||||||
# Note Metadaten
|
|
||||||
n_name, n_pts = points_for_note(self.prefix, note_pl, None, self.dim)
|
n_name, n_pts = points_for_note(self.prefix, note_pl, None, self.dim)
|
||||||
upsert_batch(self.client, n_name, n_pts)
|
upsert_batch(self.client, n_name, n_pts)
|
||||||
|
|
||||||
# Chunks (Vektoren)
|
|
||||||
if chunk_pls and vecs:
|
if chunk_pls and vecs:
|
||||||
c_name, c_pts = points_for_chunks(self.prefix, chunk_pls, vecs)
|
c_name, c_pts = points_for_chunks(self.prefix, chunk_pls, vecs)
|
||||||
upsert_batch(self.client, c_name, c_pts)
|
upsert_batch(self.client, c_name, c_pts)
|
||||||
|
|
||||||
# Kanten
|
|
||||||
if edges:
|
if edges:
|
||||||
e_name, e_pts = points_for_edges(self.prefix, edges)
|
e_name, e_pts = points_for_edges(self.prefix, edges)
|
||||||
upsert_batch(self.client, e_name, e_pts)
|
upsert_batch(self.client, e_name, e_pts)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"path": file_path,
|
"path": file_path, "status": "success", "changed": True, "note_id": note_id,
|
||||||
"status": "success",
|
"chunks_count": len(chunk_pls), "edges_count": len(edges)
|
||||||
"changed": True,
|
|
||||||
"note_id": note_id,
|
|
||||||
"chunks_count": len(chunk_pls),
|
|
||||||
"edges_count": len(edges)
|
|
||||||
}
|
}
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Upsert failed: {e}", exc_info=True)
|
logger.error(f"Upsert failed: {e}", exc_info=True)
|
||||||
|
|
@ -319,19 +316,17 @@ class IngestionService:
|
||||||
except: return None
|
except: return None
|
||||||
|
|
||||||
def _artifacts_missing(self, note_id: str) -> Tuple[bool, bool]:
|
def _artifacts_missing(self, note_id: str) -> Tuple[bool, bool]:
|
||||||
"""Prüft, ob Chunks oder Kanten für eine Note fehlen (Integritätscheck)."""
|
"""Prüft, ob Chunks oder Kanten für eine Note fehlen."""
|
||||||
from qdrant_client.http import models as rest
|
from qdrant_client.http import models as rest
|
||||||
c_col = f"{self.prefix}_chunks"
|
|
||||||
e_col = f"{self.prefix}_edges"
|
|
||||||
try:
|
try:
|
||||||
f = rest.Filter(must=[rest.FieldCondition(key="note_id", match=rest.MatchValue(value=note_id))])
|
f = rest.Filter(must=[rest.FieldCondition(key="note_id", match=rest.MatchValue(value=note_id))])
|
||||||
c_pts, _ = self.client.scroll(collection_name=c_col, scroll_filter=f, limit=1)
|
c_pts, _ = self.client.scroll(collection_name=f"{self.prefix}_chunks", scroll_filter=f, limit=1)
|
||||||
e_pts, _ = self.client.scroll(collection_name=e_col, scroll_filter=f, limit=1)
|
e_pts, _ = self.client.scroll(collection_name=f"{self.prefix}_edges", scroll_filter=f, limit=1)
|
||||||
return (not bool(c_pts)), (not bool(e_pts))
|
return (not bool(c_pts)), (not bool(e_pts))
|
||||||
except: return True, True
|
except: return True, True
|
||||||
|
|
||||||
def _purge_artifacts(self, note_id: str):
|
def _purge_artifacts(self, note_id: str):
|
||||||
"""Löscht alle Chunks und Edges einer Note (vor dem Neu-Schreiben)."""
|
"""Löscht alle Chunks und Edges einer Note."""
|
||||||
from qdrant_client.http import models as rest
|
from qdrant_client.http import models as rest
|
||||||
f = rest.Filter(must=[rest.FieldCondition(key="note_id", match=rest.MatchValue(value=note_id))])
|
f = rest.Filter(must=[rest.FieldCondition(key="note_id", match=rest.MatchValue(value=note_id))])
|
||||||
selector = rest.FilterSelector(filter=f)
|
selector = rest.FilterSelector(filter=f)
|
||||||
|
|
@ -341,7 +336,7 @@ class IngestionService:
|
||||||
except Exception: pass
|
except Exception: pass
|
||||||
|
|
||||||
async def create_from_text(self, markdown_content: str, filename: str, vault_root: str, folder: str = "00_Inbox") -> Dict[str, Any]:
|
async def create_from_text(self, markdown_content: str, filename: str, vault_root: str, folder: str = "00_Inbox") -> Dict[str, Any]:
|
||||||
"""Hilfsmethode zur Erstellung einer Note aus einem Textstream (Editor-Save)."""
|
"""Hilfsmethode zur Erstellung einer Note aus einem Textstream."""
|
||||||
target_dir = os.path.join(vault_root, folder)
|
target_dir = os.path.join(vault_root, folder)
|
||||||
os.makedirs(target_dir, exist_ok=True)
|
os.makedirs(target_dir, exist_ok=True)
|
||||||
file_path = os.path.join(target_dir, filename)
|
file_path = os.path.join(target_dir, filename)
|
||||||
|
|
@ -351,7 +346,6 @@ class IngestionService:
|
||||||
f.flush()
|
f.flush()
|
||||||
os.fsync(f.fileno())
|
os.fsync(f.fileno())
|
||||||
await asyncio.sleep(0.1)
|
await asyncio.sleep(0.1)
|
||||||
logger.info(f"Written file to {file_path}")
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return {"status": "error", "error": f"Disk write failed: {str(e)}"}
|
return {"status": "error", "error": f"Disk write failed: {str(e)}"}
|
||||||
return await self.process_file(file_path=file_path, vault_root=vault_root, apply=True, force_replace=True, purge_before=True)
|
return await self.process_file(file_path=file_path, vault_root=vault_root, apply=True, force_replace=True, purge_before=True)
|
||||||
|
|
@ -1,8 +1,9 @@
|
||||||
"""
|
"""
|
||||||
FILE: app/routers/ingest.py
|
FILE: app/routers/ingest.py
|
||||||
DESCRIPTION: Endpunkte für WP-11. Nimmt Markdown entgegen.
|
DESCRIPTION: Endpunkte für WP-11. Nimmt Markdown entgegen.
|
||||||
Refactored für WP-14: Nutzt BackgroundTasks für non-blocking Save.
|
Refactored für WP-14: Nutzt BackgroundTasks für non-blocking Save.
|
||||||
VERSION: 0.7.0 (Fix: Timeout WP-14)
|
Update WP-20: Unterstützung für Hybrid-Cloud-Analyse Feedback.
|
||||||
|
VERSION: 0.8.0 (WP-20 Hybrid Ready)
|
||||||
STATUS: Active
|
STATUS: Active
|
||||||
DEPENDENCIES: app.core.ingestion, app.services.discovery, fastapi, pydantic
|
DEPENDENCIES: app.core.ingestion, app.services.discovery, fastapi, pydantic
|
||||||
"""
|
"""
|
||||||
|
|
@ -44,6 +45,7 @@ class SaveResponse(BaseModel):
|
||||||
async def run_ingestion_task(markdown_content: str, filename: str, vault_root: str, folder: str):
|
async def run_ingestion_task(markdown_content: str, filename: str, vault_root: str, folder: str):
|
||||||
"""
|
"""
|
||||||
Führt die Ingestion im Hintergrund aus, damit der Request nicht blockiert.
|
Führt die Ingestion im Hintergrund aus, damit der Request nicht blockiert.
|
||||||
|
Integrierter WP-20 Hybrid-Modus über den IngestionService.
|
||||||
"""
|
"""
|
||||||
logger.info(f"🔄 Background Task started: Ingesting {filename}...")
|
logger.info(f"🔄 Background Task started: Ingesting {filename}...")
|
||||||
try:
|
try:
|
||||||
|
|
@ -80,15 +82,17 @@ async def analyze_draft(req: AnalyzeRequest):
|
||||||
async def save_note(req: SaveRequest, background_tasks: BackgroundTasks):
|
async def save_note(req: SaveRequest, background_tasks: BackgroundTasks):
|
||||||
"""
|
"""
|
||||||
WP-14 Fix: Startet Ingestion im Hintergrund (Fire & Forget).
|
WP-14 Fix: Startet Ingestion im Hintergrund (Fire & Forget).
|
||||||
Verhindert Timeouts bei aktiver Smart-Edge-Allocation (WP-15).
|
Verhindert Timeouts bei aktiver Smart-Edge-Allocation (WP-15) und Cloud-Hybrid-Modus (WP-20).
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
vault_root = os.getenv("MINDNET_VAULT_ROOT", "./vault")
|
vault_root = os.getenv("MINDNET_VAULT_ROOT", "./vault")
|
||||||
abs_vault_root = os.path.abspath(vault_root)
|
abs_vault_root = os.path.abspath(vault_root)
|
||||||
|
|
||||||
if not os.path.exists(abs_vault_root):
|
if not os.path.exists(abs_vault_root):
|
||||||
try: os.makedirs(abs_vault_root, exist_ok=True)
|
try:
|
||||||
except: pass
|
os.makedirs(abs_vault_root, exist_ok=True)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Could not create vault root: {e}")
|
||||||
|
|
||||||
final_filename = req.filename or f"draft_{int(time.time())}.md"
|
final_filename = req.filename or f"draft_{int(time.time())}.md"
|
||||||
|
|
||||||
|
|
@ -109,7 +113,7 @@ async def save_note(req: SaveRequest, background_tasks: BackgroundTasks):
|
||||||
status="queued",
|
status="queued",
|
||||||
file_path=os.path.join(req.folder, final_filename),
|
file_path=os.path.join(req.folder, final_filename),
|
||||||
note_id="pending",
|
note_id="pending",
|
||||||
message="Speicherung & KI-Analyse im Hintergrund gestartet.",
|
message="Speicherung & Hybrid-KI-Analyse (WP-20) im Hintergrund gestartet.",
|
||||||
stats={
|
stats={
|
||||||
"chunks": -1, # Indikator für Async
|
"chunks": -1, # Indikator für Async
|
||||||
"edges": -1
|
"edges": -1
|
||||||
|
|
|
||||||
|
|
@ -1,11 +1,11 @@
|
||||||
"""
|
"""
|
||||||
FILE: app/services/llm_service.py
|
FILE: app/services/llm_service.py
|
||||||
DESCRIPTION: Asynchroner Client für Ollama. Verwaltet Prompts und Background-Last (Semaphore).
|
DESCRIPTION: Hybrid-Client für Ollama & Google Gemini.
|
||||||
VERSION: 2.8.0
|
Verwaltet Prompts, Background-Last (Semaphore) und Cloud-Routing.
|
||||||
|
VERSION: 3.1.0 (WP-20 Full Integration: Provider-Aware Prompting)
|
||||||
STATUS: Active
|
STATUS: Active
|
||||||
DEPENDENCIES: httpx, yaml, asyncio, app.config
|
DEPENDENCIES: httpx, yaml, asyncio, google-generativeai, app.config
|
||||||
EXTERNAL_CONFIG: config/prompts.yaml
|
EXTERNAL_CONFIG: config/prompts.yaml
|
||||||
LAST_ANALYSIS: 2025-12-15
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
|
|
@ -13,47 +13,47 @@ import yaml
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
import asyncio
|
import asyncio
|
||||||
|
import json
|
||||||
|
import google.generativeai as genai
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Optional, Dict, Any, Literal
|
from typing import Optional, Dict, Any, Literal
|
||||||
|
from app.config import get_settings
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
class Settings:
|
|
||||||
OLLAMA_URL = os.getenv("MINDNET_OLLAMA_URL", "http://127.0.0.1:11434")
|
|
||||||
LLM_TIMEOUT = float(os.getenv("MINDNET_LLM_TIMEOUT", 300.0))
|
|
||||||
LLM_MODEL = os.getenv("MINDNET_LLM_MODEL", "phi3:mini")
|
|
||||||
PROMPTS_PATH = os.getenv("MINDNET_PROMPTS_PATH", "./config/prompts.yaml")
|
|
||||||
|
|
||||||
# NEU: Konfigurierbares Limit für Hintergrund-Last
|
|
||||||
# Default auf 2 (konservativ), kann in .env erhöht werden.
|
|
||||||
BACKGROUND_LIMIT = int(os.getenv("MINDNET_LLM_BACKGROUND_LIMIT", "2"))
|
|
||||||
|
|
||||||
def get_settings():
|
|
||||||
return Settings()
|
|
||||||
|
|
||||||
class LLMService:
|
class LLMService:
|
||||||
# GLOBALER SEMAPHOR (Lazy Initialization)
|
# GLOBALER SEMAPHOR für Hintergrund-Last Steuerung (WP-06 / WP-20)
|
||||||
# Wir initialisieren ihn erst, wenn wir die Settings kennen.
|
|
||||||
_background_semaphore = None
|
_background_semaphore = None
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.settings = get_settings()
|
self.settings = get_settings()
|
||||||
self.prompts = self._load_prompts()
|
self.prompts = self._load_prompts()
|
||||||
|
|
||||||
# Initialisiere Semaphore einmalig auf Klassen-Ebene basierend auf Config
|
# Initialisiere Semaphore einmalig auf Klassen-Ebene
|
||||||
if LLMService._background_semaphore is None:
|
if LLMService._background_semaphore is None:
|
||||||
limit = self.settings.BACKGROUND_LIMIT
|
limit = getattr(self.settings, "BACKGROUND_LIMIT", 2)
|
||||||
logger.info(f"🚦 LLMService: Initializing Background Semaphore with limit: {limit}")
|
logger.info(f"🚦 LLMService: Initializing Background Semaphore with limit: {limit}")
|
||||||
LLMService._background_semaphore = asyncio.Semaphore(limit)
|
LLMService._background_semaphore = asyncio.Semaphore(limit)
|
||||||
|
|
||||||
|
# Ollama Setup
|
||||||
self.timeout = httpx.Timeout(self.settings.LLM_TIMEOUT, connect=10.0)
|
self.timeout = httpx.Timeout(self.settings.LLM_TIMEOUT, connect=10.0)
|
||||||
|
self.ollama_client = httpx.AsyncClient(
|
||||||
self.client = httpx.AsyncClient(
|
|
||||||
base_url=self.settings.OLLAMA_URL,
|
base_url=self.settings.OLLAMA_URL,
|
||||||
timeout=self.timeout
|
timeout=self.timeout
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Gemini Setup [WP-20]
|
||||||
|
if hasattr(self.settings, "GOOGLE_API_KEY") and self.settings.GOOGLE_API_KEY:
|
||||||
|
genai.configure(api_key=self.settings.GOOGLE_API_KEY)
|
||||||
|
model_name = getattr(self.settings, "GEMINI_MODEL", "gemini-1.5-flash")
|
||||||
|
self.gemini_model = genai.GenerativeModel(model_name)
|
||||||
|
logger.info(f"✨ LLMService: Gemini Cloud Mode active ({model_name})")
|
||||||
|
else:
|
||||||
|
self.gemini_model = None
|
||||||
|
logger.warning("⚠️ LLMService: No GOOGLE_API_KEY found. Gemini mode disabled.")
|
||||||
|
|
||||||
def _load_prompts(self) -> dict:
|
def _load_prompts(self) -> dict:
|
||||||
|
"""Lädt die Prompt-Konfiguration aus der YAML-Datei."""
|
||||||
path = Path(self.settings.PROMPTS_PATH)
|
path = Path(self.settings.PROMPTS_PATH)
|
||||||
if not path.exists(): return {}
|
if not path.exists(): return {}
|
||||||
try:
|
try:
|
||||||
|
|
@ -62,31 +62,74 @@ class LLMService:
|
||||||
logger.error(f"Failed to load prompts: {e}")
|
logger.error(f"Failed to load prompts: {e}")
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
|
def get_prompt(self, key: str, provider: str = None) -> str:
|
||||||
|
"""
|
||||||
|
Wählt das Template basierend auf dem Provider aus (WP-20).
|
||||||
|
Unterstützt sowohl flache Strings als auch Dictionary-basierte Provider-Zweige.
|
||||||
|
"""
|
||||||
|
active_provider = provider or getattr(self.settings, "MINDNET_LLM_PROVIDER", "ollama")
|
||||||
|
data = self.prompts.get(key, "")
|
||||||
|
|
||||||
|
if isinstance(data, dict):
|
||||||
|
# Versuche den Provider-Key, Fallback auf 'ollama'
|
||||||
|
return data.get(active_provider, data.get("ollama", ""))
|
||||||
|
return str(data)
|
||||||
|
|
||||||
async def generate_raw_response(
|
async def generate_raw_response(
|
||||||
self,
|
self,
|
||||||
prompt: str,
|
prompt: str,
|
||||||
system: str = None,
|
system: str = None,
|
||||||
force_json: bool = False,
|
force_json: bool = False,
|
||||||
max_retries: int = 0,
|
max_retries: int = 2,
|
||||||
base_delay: float = 2.0,
|
base_delay: float = 2.0,
|
||||||
priority: Literal["realtime", "background"] = "realtime"
|
priority: Literal["realtime", "background"] = "realtime",
|
||||||
|
provider: Optional[str] = None
|
||||||
) -> str:
|
) -> str:
|
||||||
"""
|
"""
|
||||||
Führt einen LLM Call aus.
|
Führt einen LLM Call aus mit Priority-Handling und Provider-Wahl.
|
||||||
priority="realtime": Chat (Sofort, keine Bremse).
|
|
||||||
priority="background": Import/Analyse (Gedrosselt durch Semaphore).
|
|
||||||
"""
|
"""
|
||||||
|
# Bestimme Provider: Parameter-Override > Config-Default
|
||||||
|
target_provider = provider or getattr(self.settings, "MINDNET_LLM_PROVIDER", "ollama")
|
||||||
|
|
||||||
use_semaphore = (priority == "background")
|
use_semaphore = (priority == "background")
|
||||||
|
|
||||||
if use_semaphore and LLMService._background_semaphore:
|
if use_semaphore and LLMService._background_semaphore:
|
||||||
async with LLMService._background_semaphore:
|
async with LLMService._background_semaphore:
|
||||||
return await self._execute_request(prompt, system, force_json, max_retries, base_delay)
|
return await self._dispatch_request(target_provider, prompt, system, force_json, max_retries, base_delay)
|
||||||
else:
|
else:
|
||||||
# Realtime oder Fallback (falls Semaphore Init fehlschlug)
|
return await self._dispatch_request(target_provider, prompt, system, force_json, max_retries, base_delay)
|
||||||
return await self._execute_request(prompt, system, force_json, max_retries, base_delay)
|
|
||||||
|
|
||||||
async def _execute_request(self, prompt, system, force_json, max_retries, base_delay):
|
async def _dispatch_request(self, provider, prompt, system, force_json, max_retries, base_delay):
|
||||||
|
"""Routet die Anfrage an den gewählten Provider mit Fallback-Logik."""
|
||||||
|
try:
|
||||||
|
if provider == "gemini" and self.gemini_model:
|
||||||
|
return await self._execute_gemini(prompt, system, force_json)
|
||||||
|
else:
|
||||||
|
return await self._execute_ollama(prompt, system, force_json, max_retries, base_delay)
|
||||||
|
except Exception as e:
|
||||||
|
# Automatischer Fallback auf Ollama bei Cloud-Fehlern (WP-20)
|
||||||
|
if provider == "gemini" and getattr(self.settings, "LLM_FALLBACK_ENABLED", True):
|
||||||
|
logger.warning(f"🔄 Gemini failed: {e}. Falling back to Ollama.")
|
||||||
|
return await self._execute_ollama(prompt, system, force_json, max_retries, base_delay)
|
||||||
|
raise e
|
||||||
|
|
||||||
|
async def _execute_gemini(self, prompt, system, force_json) -> str:
|
||||||
|
"""Asynchroner Google Gemini Call (WP-20)."""
|
||||||
|
full_prompt = f"System: {system}\n\nUser: {prompt}" if system else prompt
|
||||||
|
|
||||||
|
# Gemini JSON Mode Support
|
||||||
|
gen_config = {}
|
||||||
|
if force_json:
|
||||||
|
gen_config["response_mime_type"] = "application/json"
|
||||||
|
|
||||||
|
response = await self.gemini_model.generate_content_async(
|
||||||
|
full_prompt,
|
||||||
|
generation_config=gen_config
|
||||||
|
)
|
||||||
|
return response.text.strip()
|
||||||
|
|
||||||
|
async def _execute_ollama(self, prompt, system, force_json, max_retries, base_delay) -> str:
|
||||||
|
"""Ollama Call mit exponentieller Backoff-Retry-Logik."""
|
||||||
payload: Dict[str, Any] = {
|
payload: Dict[str, Any] = {
|
||||||
"model": self.settings.LLM_MODEL,
|
"model": self.settings.LLM_MODEL,
|
||||||
"prompt": prompt,
|
"prompt": prompt,
|
||||||
|
|
@ -96,52 +139,47 @@ class LLMService:
|
||||||
"num_ctx": 8192
|
"num_ctx": 8192
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if force_json: payload["format"] = "json"
|
||||||
if force_json:
|
if system: payload["system"] = system
|
||||||
payload["format"] = "json"
|
|
||||||
|
|
||||||
if system:
|
|
||||||
payload["system"] = system
|
|
||||||
|
|
||||||
attempt = 0
|
attempt = 0
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
response = await self.client.post("/api/generate", json=payload)
|
response = await self.ollama_client.post("/api/generate", json=payload)
|
||||||
|
|
||||||
if response.status_code == 200:
|
if response.status_code == 200:
|
||||||
data = response.json()
|
return response.json().get("response", "").strip()
|
||||||
return data.get("response", "").strip()
|
|
||||||
else:
|
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
attempt += 1
|
attempt += 1
|
||||||
if attempt > max_retries:
|
if attempt > max_retries:
|
||||||
logger.error(f"LLM Final Error (Versuch {attempt}): {e}")
|
logger.error(f"Ollama Error after {attempt} retries: {e}")
|
||||||
raise e
|
raise e
|
||||||
|
# Exponentieller Backoff: base_delay * (2 ^ (attempt - 1))
|
||||||
wait_time = base_delay * (2 ** (attempt - 1))
|
wait_time = base_delay * (2 ** (attempt - 1))
|
||||||
logger.warning(f"⚠️ LLM Retry ({attempt}/{max_retries}) in {wait_time}s: {e}")
|
logger.warning(f"⚠️ Ollama attempt {attempt} failed. Retrying in {wait_time}s...")
|
||||||
await asyncio.sleep(wait_time)
|
await asyncio.sleep(wait_time)
|
||||||
|
|
||||||
async def generate_rag_response(self, query: str, context_str: str) -> str:
|
async def generate_rag_response(self, query: str, context_str: str) -> str:
|
||||||
"""
|
"""Standard RAG Chat-Interface mit Provider-spezifischen Templates."""
|
||||||
Chat-Wrapper: Immer Realtime.
|
provider = getattr(self.settings, "MINDNET_LLM_PROVIDER", "ollama")
|
||||||
"""
|
|
||||||
system_prompt = self.prompts.get("system_prompt", "")
|
# Holen der Templates über die neue get_prompt Methode
|
||||||
rag_template = self.prompts.get("rag_template", "{context_str}\n\n{query}")
|
system_prompt = self.get_prompt("system_prompt", provider)
|
||||||
|
rag_template = self.get_prompt("rag_template", provider)
|
||||||
|
|
||||||
|
# Fallback für RAG Template Struktur
|
||||||
|
if not rag_template:
|
||||||
|
rag_template = "{context_str}\n\n{query}"
|
||||||
|
|
||||||
final_prompt = rag_template.format(context_str=context_str, query=query)
|
final_prompt = rag_template.format(context_str=context_str, query=query)
|
||||||
|
|
||||||
return await self.generate_raw_response(
|
return await self.generate_raw_response(
|
||||||
final_prompt,
|
final_prompt,
|
||||||
system=system_prompt,
|
system=system_prompt,
|
||||||
max_retries=0,
|
|
||||||
force_json=False,
|
|
||||||
priority="realtime"
|
priority="realtime"
|
||||||
)
|
)
|
||||||
|
|
||||||
async def close(self):
|
async def close(self):
|
||||||
if self.client:
|
"""Schließt alle offenen HTTP-Verbindungen."""
|
||||||
await self.client.aclose()
|
if self.ollama_client:
|
||||||
|
await self.ollama_client.aclose()
|
||||||
|
|
@ -1,31 +1,37 @@
|
||||||
# config/decision_engine.yaml
|
# config/decision_engine.yaml
|
||||||
# Steuerung der Decision Engine (Intent Recognition & Graph Routing)
|
# Steuerung der Decision Engine (Intent Recognition & Graph Routing)
|
||||||
# Version: 2.5.0 (WP-22: Semantic Graph Routing)
|
# VERSION: 2.6.0 (WP-20: Hybrid LLM & WP-22: Semantic Graph Routing)
|
||||||
|
# STATUS: Active
|
||||||
|
|
||||||
version: 2.5
|
version: 2.6
|
||||||
|
|
||||||
settings:
|
settings:
|
||||||
llm_fallback_enabled: true
|
llm_fallback_enabled: true
|
||||||
|
|
||||||
|
# Strategie für den Router selbst (Welches Modell erkennt den Intent?)
|
||||||
|
# "auto" nutzt den in MINDNET_LLM_PROVIDER gesetzten Standard.
|
||||||
|
router_provider: "auto"
|
||||||
|
|
||||||
# Few-Shot Prompting für den LLM-Router (Slow Path)
|
# Few-Shot Prompting für den LLM-Router (Slow Path)
|
||||||
|
# Gemini 1.5 nutzt diesen Kontext für hochpräzise Intent-Erkennung.
|
||||||
llm_router_prompt: |
|
llm_router_prompt: |
|
||||||
Du bist ein Klassifikator. Analysiere die Nachricht und wähle die passende Strategie.
|
Du bist der zentrale Intent-Klassifikator für Mindnet, einen digitalen Zwilling.
|
||||||
|
Analysiere die Nachricht und wähle die passende Strategie.
|
||||||
Antworte NUR mit dem Namen der Strategie.
|
Antworte NUR mit dem Namen der Strategie.
|
||||||
|
|
||||||
STRATEGIEN:
|
STRATEGIEN:
|
||||||
- INTERVIEW: User will Wissen erfassen, Notizen anlegen oder Dinge festhalten.
|
- INTERVIEW: User will Wissen erfassen, Notizen anlegen oder Dinge festhalten.
|
||||||
- DECISION: Rat, Strategie, Vor/Nachteile, "Soll ich".
|
- DECISION: Rat, Strategie, Abwägung von Werten, "Soll ich tun X?".
|
||||||
- EMPATHY: Gefühle, Frust, Freude, Probleme.
|
- EMPATHY: Gefühle, Reflexion der eigenen Verfassung, Frust, Freude.
|
||||||
- CODING: Code, Syntax, Programmierung.
|
- CODING: Code-Erstellung, Debugging, technische Dokumentation.
|
||||||
- FACT: Wissen, Fakten, Definitionen.
|
- FACT: Reine Wissensabfrage, Definitionen, Suchen von Informationen.
|
||||||
|
|
||||||
BEISPIELE:
|
BEISPIELE:
|
||||||
User: "Wie funktioniert Qdrant?" -> FACT
|
User: "Wie funktioniert die Qdrant-Vektor-DB?" -> FACT
|
||||||
User: "Soll ich Qdrant nutzen?" -> DECISION
|
User: "Soll ich mein Startup jetzt verkaufen?" -> DECISION
|
||||||
User: "Ich möchte etwas notieren" -> INTERVIEW
|
User: "Notiere mir kurz meine Gedanken zum Meeting." -> INTERVIEW
|
||||||
User: "Lass uns das festhalten" -> INTERVIEW
|
User: "Ich fühle mich heute sehr erschöpft." -> EMPATHY
|
||||||
User: "Schreibe ein Python Script" -> CODING
|
User: "Schreibe eine FastAPI-Route für den Ingest." -> CODING
|
||||||
User: "Alles ist grau und sinnlos" -> EMPATHY
|
|
||||||
|
|
||||||
NACHRICHT: "{query}"
|
NACHRICHT: "{query}"
|
||||||
|
|
||||||
|
|
@ -35,6 +41,7 @@ strategies:
|
||||||
# 1. Fakten-Abfrage (Fallback & Default)
|
# 1. Fakten-Abfrage (Fallback & Default)
|
||||||
FACT:
|
FACT:
|
||||||
description: "Reine Wissensabfrage."
|
description: "Reine Wissensabfrage."
|
||||||
|
preferred_provider: "ollama" # Schnell und lokal ausreichend
|
||||||
trigger_keywords: []
|
trigger_keywords: []
|
||||||
inject_types: []
|
inject_types: []
|
||||||
# WP-22: Definitionen & Hierarchien bevorzugen
|
# WP-22: Definitionen & Hierarchien bevorzugen
|
||||||
|
|
@ -46,9 +53,10 @@ strategies:
|
||||||
prompt_template: "rag_template"
|
prompt_template: "rag_template"
|
||||||
prepend_instruction: null
|
prepend_instruction: null
|
||||||
|
|
||||||
# 2. Entscheidungs-Frage
|
# 2. Entscheidungs-Frage (Power-Strategie)
|
||||||
DECISION:
|
DECISION:
|
||||||
description: "Der User sucht Rat, Strategie oder Abwägung."
|
description: "Der User sucht Rat, Strategie oder Abwägung."
|
||||||
|
preferred_provider: "gemini" # Nutzt Gemini's Reasoning-Power für WP-20
|
||||||
trigger_keywords:
|
trigger_keywords:
|
||||||
- "soll ich"
|
- "soll ich"
|
||||||
- "meinung"
|
- "meinung"
|
||||||
|
|
@ -68,12 +76,13 @@ strategies:
|
||||||
impacts: 2.0 # NEU: Zeige mir alles, was von dieser Entscheidung betroffen ist!
|
impacts: 2.0 # NEU: Zeige mir alles, was von dieser Entscheidung betroffen ist!
|
||||||
prompt_template: "decision_template"
|
prompt_template: "decision_template"
|
||||||
prepend_instruction: |
|
prepend_instruction: |
|
||||||
!!! ENTSCHEIDUNGS-MODUS !!!
|
!!! ENTSCHEIDUNGS-MODUS (HYBRID AI) !!!
|
||||||
BITTE WÄGE FAKTEN GEGEN FOLGENDE WERTE, PRINZIPIEN UND ZIELE AB:
|
BITTE WÄGE FAKTEN GEGEN FOLGENDE WERTE, PRINZIPIEN UND ZIELE AB:
|
||||||
|
|
||||||
# 3. Empathie / "Ich"-Modus
|
# 3. Empathie / "Ich"-Modus (Privacy-Fokus)
|
||||||
EMPATHY:
|
EMPATHY:
|
||||||
description: "Reaktion auf emotionale Zustände."
|
description: "Reaktion auf emotionale Zustände."
|
||||||
|
preferred_provider: "ollama" # Private Emotionen bleiben lokal!
|
||||||
trigger_keywords:
|
trigger_keywords:
|
||||||
- "ich fühle"
|
- "ich fühle"
|
||||||
- "traurig"
|
- "traurig"
|
||||||
|
|
@ -96,6 +105,7 @@ strategies:
|
||||||
# 4. Coding / Technical
|
# 4. Coding / Technical
|
||||||
CODING:
|
CODING:
|
||||||
description: "Technische Anfragen und Programmierung."
|
description: "Technische Anfragen und Programmierung."
|
||||||
|
preferred_provider: "gemini" # Höheres Weltwissen für moderne Libraries
|
||||||
trigger_keywords:
|
trigger_keywords:
|
||||||
- "code"
|
- "code"
|
||||||
- "python"
|
- "python"
|
||||||
|
|
@ -116,10 +126,9 @@ strategies:
|
||||||
prepend_instruction: null
|
prepend_instruction: null
|
||||||
|
|
||||||
# 5. Interview / Datenerfassung
|
# 5. Interview / Datenerfassung
|
||||||
# HINWEIS: Spezifische Typen (Projekt, Ziel etc.) werden automatisch
|
|
||||||
# über die types.yaml erkannt. Hier stehen nur generische Trigger.
|
|
||||||
INTERVIEW:
|
INTERVIEW:
|
||||||
description: "Der User möchte Wissen erfassen."
|
description: "Der User möchte Wissen erfassen."
|
||||||
|
preferred_provider: "ollama" # Lokale Erfassung ist performant genug
|
||||||
trigger_keywords:
|
trigger_keywords:
|
||||||
- "neue notiz"
|
- "neue notiz"
|
||||||
- "etwas notieren"
|
- "etwas notieren"
|
||||||
|
|
@ -135,8 +144,7 @@ strategies:
|
||||||
edge_boosts: {}
|
edge_boosts: {}
|
||||||
prompt_template: "interview_template"
|
prompt_template: "interview_template"
|
||||||
prepend_instruction: null
|
prepend_instruction: null
|
||||||
# Schemas: Hier nur der Fallback.
|
# Schemas kommen aus types.yaml (WP-22)
|
||||||
# Spezifische Schemas (Project, Experience) kommen jetzt aus types.yaml!
|
|
||||||
schemas:
|
schemas:
|
||||||
default:
|
default:
|
||||||
fields:
|
fields:
|
||||||
|
|
|
||||||
|
|
@ -15,7 +15,8 @@ system_prompt: |
|
||||||
# ---------------------------------------------------------
|
# ---------------------------------------------------------
|
||||||
# 1. STANDARD: Fakten & Wissen (Intent: FACT)
|
# 1. STANDARD: Fakten & Wissen (Intent: FACT)
|
||||||
# ---------------------------------------------------------
|
# ---------------------------------------------------------
|
||||||
rag_template: |
|
rag_template:
|
||||||
|
ollama: |
|
||||||
QUELLEN (WISSEN):
|
QUELLEN (WISSEN):
|
||||||
=========================================
|
=========================================
|
||||||
{context_str}
|
{context_str}
|
||||||
|
|
@ -27,11 +28,16 @@ rag_template: |
|
||||||
ANWEISUNG:
|
ANWEISUNG:
|
||||||
Beantworte die Frage präzise basierend auf den Quellen.
|
Beantworte die Frage präzise basierend auf den Quellen.
|
||||||
Fasse die Informationen zusammen. Sei objektiv und neutral.
|
Fasse die Informationen zusammen. Sei objektiv und neutral.
|
||||||
|
gemini: |
|
||||||
|
Analysiere diesen Kontext meines digitalen Zwillings:
|
||||||
|
{context_str}
|
||||||
|
Beantworte die Anfrage detailliert und prüfe auf Widersprüche: {query}
|
||||||
|
|
||||||
# ---------------------------------------------------------
|
# ---------------------------------------------------------
|
||||||
# 2. DECISION: Strategie & Abwägung (Intent: DECISION)
|
# 2. DECISION: Strategie & Abwägung (Intent: DECISION)
|
||||||
# ---------------------------------------------------------
|
# ---------------------------------------------------------
|
||||||
decision_template: |
|
decision_template:
|
||||||
|
ollama: |
|
||||||
KONTEXT (FAKTEN & STRATEGIE):
|
KONTEXT (FAKTEN & STRATEGIE):
|
||||||
=========================================
|
=========================================
|
||||||
{context_str}
|
{context_str}
|
||||||
|
|
@ -50,11 +56,15 @@ decision_template: |
|
||||||
- **Analyse:** (Kurze Zusammenfassung der Fakten)
|
- **Analyse:** (Kurze Zusammenfassung der Fakten)
|
||||||
- **Abgleich:** (Gibt es Konflikte mit Werten/Zielen? Nenne die Quelle!)
|
- **Abgleich:** (Gibt es Konflikte mit Werten/Zielen? Nenne die Quelle!)
|
||||||
- **Empfehlung:** (Klare Meinung: Ja/Nein/Vielleicht mit Begründung)
|
- **Empfehlung:** (Klare Meinung: Ja/Nein/Vielleicht mit Begründung)
|
||||||
|
gemini: |
|
||||||
|
Agierte als Senior Strategy Consultant. Nutze den Kontext {context_str}, um die Frage {query}
|
||||||
|
tiefgreifend gegen meine langfristigen Ziele abzuwägen.
|
||||||
|
|
||||||
# ---------------------------------------------------------
|
# ---------------------------------------------------------
|
||||||
# 3. EMPATHY: Der Spiegel / "Ich"-Modus (Intent: EMPATHY)
|
# 3. EMPATHY: Der Spiegel / "Ich"-Modus (Intent: EMPATHY)
|
||||||
# ---------------------------------------------------------
|
# ---------------------------------------------------------
|
||||||
empathy_template: |
|
empathy_template:
|
||||||
|
ollama: |
|
||||||
KONTEXT (ERFAHRUNGEN & GLAUBENSSÄTZE):
|
KONTEXT (ERFAHRUNGEN & GLAUBENSSÄTZE):
|
||||||
=========================================
|
=========================================
|
||||||
{context_str}
|
{context_str}
|
||||||
|
|
@ -75,7 +85,8 @@ empathy_template: |
|
||||||
# ---------------------------------------------------------
|
# ---------------------------------------------------------
|
||||||
# 4. TECHNICAL: Der Coder (Intent: CODING)
|
# 4. TECHNICAL: Der Coder (Intent: CODING)
|
||||||
# ---------------------------------------------------------
|
# ---------------------------------------------------------
|
||||||
technical_template: |
|
technical_template:
|
||||||
|
ollama: |
|
||||||
KONTEXT (DOCS & SNIPPETS):
|
KONTEXT (DOCS & SNIPPETS):
|
||||||
=========================================
|
=========================================
|
||||||
{context_str}
|
{context_str}
|
||||||
|
|
@ -97,7 +108,8 @@ technical_template: |
|
||||||
# ---------------------------------------------------------
|
# ---------------------------------------------------------
|
||||||
# 5. INTERVIEW: Der "One-Shot Extractor" (Performance Mode)
|
# 5. INTERVIEW: Der "One-Shot Extractor" (Performance Mode)
|
||||||
# ---------------------------------------------------------
|
# ---------------------------------------------------------
|
||||||
interview_template: |
|
interview_template:
|
||||||
|
ollama: |
|
||||||
TASK:
|
TASK:
|
||||||
Du bist ein professioneller Ghostwriter. Verwandle den "USER INPUT" in eine strukturierte Notiz vom Typ '{target_type}'.
|
Du bist ein professioneller Ghostwriter. Verwandle den "USER INPUT" in eine strukturierte Notiz vom Typ '{target_type}'.
|
||||||
|
|
||||||
|
|
@ -135,7 +147,8 @@ interview_template: |
|
||||||
# ---------------------------------------------------------
|
# ---------------------------------------------------------
|
||||||
# 6. EDGE_ALLOCATION: Kantenfilter (Intent: OFFLINE_FILTER)
|
# 6. EDGE_ALLOCATION: Kantenfilter (Intent: OFFLINE_FILTER)
|
||||||
# ---------------------------------------------------------
|
# ---------------------------------------------------------
|
||||||
edge_allocation_template: |
|
edge_allocation_template:
|
||||||
|
ollama: |
|
||||||
TASK:
|
TASK:
|
||||||
Du bist ein strikter Selektor. Du erhältst eine Liste von "Kandidaten-Kanten" (Strings).
|
Du bist ein strikter Selektor. Du erhältst eine Liste von "Kandidaten-Kanten" (Strings).
|
||||||
Wähle jene aus, die inhaltlich im "Textabschnitt" vorkommen oder relevant sind.
|
Wähle jene aus, die inhaltlich im "Textabschnitt" vorkommen oder relevant sind.
|
||||||
|
|
@ -160,3 +173,9 @@ edge_allocation_template: |
|
||||||
Output: ["blocks:Projekt Alpha", "needs:Budget"]
|
Output: ["blocks:Projekt Alpha", "needs:Budget"]
|
||||||
|
|
||||||
DEIN OUTPUT (JSON):
|
DEIN OUTPUT (JSON):
|
||||||
|
|
||||||
|
gemini: |
|
||||||
|
Extrahiere semantische Kanten für den Graphen ({note_id}).
|
||||||
|
Finde auch implizite Verbindungen.
|
||||||
|
JSON: [{"to": "X", "kind": "Y", "reason": "Z"}].
|
||||||
|
TEXT: {text}
|
||||||
|
|
@ -35,3 +35,6 @@ streamlit>=1.39.0
|
||||||
# Visualization (Parallelbetrieb)
|
# Visualization (Parallelbetrieb)
|
||||||
streamlit-agraph>=0.0.45
|
streamlit-agraph>=0.0.45
|
||||||
st-cytoscape
|
st-cytoscape
|
||||||
|
|
||||||
|
# Google gemini API
|
||||||
|
google-generativeai>=0.8.3
|
||||||
Loading…
Reference in New Issue
Block a user