komplette openrouter integration
This commit is contained in:
parent
c60aba63a4
commit
36fb27edf0
|
|
@ -1,8 +1,8 @@
|
||||||
"""
|
"""
|
||||||
FILE: app/config.py
|
FILE: app/config.py
|
||||||
DESCRIPTION: Zentrale Pydantic-Konfiguration. Enthält alle Parameter für Qdrant,
|
DESCRIPTION: Zentrale Pydantic-Konfiguration. Enthält Parameter für Qdrant,
|
||||||
lokale Embeddings, Ollama, Google GenAI und OpenRouter.
|
Embeddings, Ollama, Google GenAI und OpenRouter.
|
||||||
VERSION: 0.6.0 (WP-20 Full Hybrid Integration)
|
VERSION: 0.6.0 (WP-20 Hybrid & OpenRouter Integration)
|
||||||
STATUS: Active
|
STATUS: Active
|
||||||
"""
|
"""
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
@ -21,16 +21,16 @@ class Settings:
|
||||||
# --- Lokale Embeddings ---
|
# --- Lokale Embeddings ---
|
||||||
MODEL_NAME: str = os.getenv("MINDNET_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
|
MODEL_NAME: str = os.getenv("MINDNET_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
|
||||||
|
|
||||||
# --- WP-20 Cloud Hybrid Mode (Google GenAI & OpenRouter) ---
|
# --- WP-20 Hybrid LLM Provider ---
|
||||||
# Erlaubt: "ollama" | "gemini" | "openrouter"
|
# Optionen: "ollama" | "gemini" | "openrouter"
|
||||||
MINDNET_LLM_PROVIDER: str = os.getenv("MINDNET_LLM_PROVIDER", "ollama").lower()
|
MINDNET_LLM_PROVIDER: str = os.getenv("MINDNET_LLM_PROVIDER", "ollama").lower()
|
||||||
|
|
||||||
# Google AI Studio (Direkt)
|
# Google AI Studio
|
||||||
GOOGLE_API_KEY: str | None = os.getenv("GOOGLE_API_KEY")
|
GOOGLE_API_KEY: str | None = os.getenv("GOOGLE_API_KEY")
|
||||||
GEMINI_MODEL: str = os.getenv("MINDNET_GEMINI_MODEL", "gemini-1.5-flash")
|
GEMINI_MODEL: str = os.getenv("MINDNET_GEMINI_MODEL", "gemini-1.5-flash")
|
||||||
GEMMA_MODEL: str = os.getenv("MINDNET_GEMMA_MODEL", "gemma2-9b-it") # Für Ingestion-Speed
|
GEMMA_MODEL: str = os.getenv("MINDNET_GEMMA_MODEL", "gemma2-9b-it")
|
||||||
|
|
||||||
# OpenRouter Integration
|
# OpenRouter
|
||||||
OPENROUTER_API_KEY: str | None = os.getenv("OPENROUTER_API_KEY")
|
OPENROUTER_API_KEY: str | None = os.getenv("OPENROUTER_API_KEY")
|
||||||
OPENROUTER_MODEL: str = os.getenv("OPENROUTER_MODEL", "google/gemma-2-9b-it:free")
|
OPENROUTER_MODEL: str = os.getenv("OPENROUTER_MODEL", "google/gemma-2-9b-it:free")
|
||||||
|
|
||||||
|
|
@ -51,7 +51,7 @@ class Settings:
|
||||||
MINDNET_VAULT_ROOT: str = os.getenv("MINDNET_VAULT_ROOT", "./vault")
|
MINDNET_VAULT_ROOT: str = os.getenv("MINDNET_VAULT_ROOT", "./vault")
|
||||||
MINDNET_TYPES_FILE: str = os.getenv("MINDNET_TYPES_FILE", "config/types.yaml")
|
MINDNET_TYPES_FILE: str = os.getenv("MINDNET_TYPES_FILE", "config/types.yaml")
|
||||||
|
|
||||||
# --- WP-04 Retriever Gewichte (Semantik vs. Graph) ---
|
# --- WP-04 Retriever Gewichte ---
|
||||||
RETRIEVER_W_SEM: float = float(os.getenv("MINDNET_WP04_W_SEM", "0.70"))
|
RETRIEVER_W_SEM: float = float(os.getenv("MINDNET_WP04_W_SEM", "0.70"))
|
||||||
RETRIEVER_W_EDGE: float = float(os.getenv("MINDNET_WP04_W_EDGE", "0.25"))
|
RETRIEVER_W_EDGE: float = float(os.getenv("MINDNET_WP04_W_EDGE", "0.25"))
|
||||||
RETRIEVER_W_CENT: float = float(os.getenv("MINDNET_WP04_W_CENT", "0.05"))
|
RETRIEVER_W_CENT: float = float(os.getenv("MINDNET_WP04_W_CENT", "0.05"))
|
||||||
|
|
|
||||||
|
|
@ -5,7 +5,7 @@ DESCRIPTION: Haupt-Ingestion-Logik. Transformiert Markdown in den Graphen (Notes
|
||||||
WP-22: Integration von Content Lifecycle (Status Gate) und Edge Registry Validation.
|
WP-22: Integration von Content Lifecycle (Status Gate) und Edge Registry Validation.
|
||||||
WP-22: Kontextsensitive Kanten-Validierung mit Fundort-Reporting (Zeilennummern).
|
WP-22: Kontextsensitive Kanten-Validierung mit Fundort-Reporting (Zeilennummern).
|
||||||
WP-22: Multi-Hash Refresh für konsistente Change Detection.
|
WP-22: Multi-Hash Refresh für konsistente Change Detection.
|
||||||
VERSION: 2.11.0 (WP-20 Full Integration: Hybrid Smart Edges)
|
VERSION: 2.11.1 (WP-20 Quota Protection: OpenRouter Priority)
|
||||||
STATUS: Active
|
STATUS: Active
|
||||||
DEPENDENCIES: app.core.parser, app.core.note_payload, app.core.chunker, app.services.llm_service, app.services.edge_registry
|
DEPENDENCIES: app.core.parser, app.core.note_payload, app.core.chunker, app.services.llm_service, app.services.edge_registry
|
||||||
EXTERNAL_CONFIG: config/types.yaml, config/prompts.yaml
|
EXTERNAL_CONFIG: config/types.yaml, config/prompts.yaml
|
||||||
|
|
@ -111,7 +111,7 @@ class IngestionService:
|
||||||
self.dim = self.cfg.dim if hasattr(self.cfg, 'dim') else self.settings.VECTOR_SIZE
|
self.dim = self.cfg.dim if hasattr(self.cfg, 'dim') else self.settings.VECTOR_SIZE
|
||||||
self.registry = load_type_registry()
|
self.registry = load_type_registry()
|
||||||
self.embedder = EmbeddingsClient()
|
self.embedder = EmbeddingsClient()
|
||||||
self.llm = LLMService() # WP-20 Integration
|
self.llm = LLMService()
|
||||||
|
|
||||||
# Change Detection Modus (full oder body)
|
# Change Detection Modus (full oder body)
|
||||||
self.active_hash_mode = os.getenv("MINDNET_CHANGE_DETECTION_MODE", "full")
|
self.active_hash_mode = os.getenv("MINDNET_CHANGE_DETECTION_MODE", "full")
|
||||||
|
|
@ -135,32 +135,36 @@ class IngestionService:
|
||||||
async def _perform_smart_edge_allocation(self, text: str, note_id: str) -> List[Dict]:
|
async def _perform_smart_edge_allocation(self, text: str, note_id: str) -> List[Dict]:
|
||||||
"""
|
"""
|
||||||
WP-20: Nutzt den Hybrid LLM Service für die semantische Kanten-Extraktion.
|
WP-20: Nutzt den Hybrid LLM Service für die semantische Kanten-Extraktion.
|
||||||
Verwendet provider-spezifische Prompts aus der config.
|
QUOTEN-SCHUTZ: Priorisiert OpenRouter (Gemma), um Gemini-Tageslimits zu schonen.
|
||||||
"""
|
"""
|
||||||
# Wir priorisieren Gemma für Ingestion, falls verfügbar (OpenRouter/Cloud)
|
# Bestimme den Provider für die Ingestion (OpenRouter bevorzugt, falls Key vorhanden)
|
||||||
model = getattr(self.settings, "GEMMA_MODEL", None)
|
provider = "openrouter" if getattr(self.settings, "OPENROUTER_API_KEY", None) else self.settings.MINDNET_LLM_PROVIDER
|
||||||
provider = self.settings.MINDNET_LLM_PROVIDER
|
|
||||||
|
|
||||||
template = self.llm.get_prompt("edge_extraction")
|
# Nutze Gemma-Modell für hohe Ingestion-Quoten (14.4K RPD) via OpenRouter oder Google
|
||||||
|
model = getattr(self.settings, "GEMMA_MODEL", None)
|
||||||
|
|
||||||
|
# Hole Prompt aus der YAML (Kaskade: Provider -> gemini -> ollama)
|
||||||
|
template = self.llm.get_prompt("edge_extraction", provider)
|
||||||
prompt = template.format(text=text[:6000], note_id=note_id)
|
prompt = template.format(text=text[:6000], note_id=note_id)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Hintergrund-Task mit Semaphore
|
# Hintergrund-Task mit Semaphore via LLMService
|
||||||
response_json = await self.llm.generate_raw_response(
|
response_json = await self.llm.generate_raw_response(
|
||||||
prompt=prompt,
|
prompt=prompt,
|
||||||
priority="background",
|
priority="background",
|
||||||
force_json=True,
|
force_json=True,
|
||||||
|
provider=provider,
|
||||||
model_override=model
|
model_override=model
|
||||||
)
|
)
|
||||||
data = json.loads(response_json)
|
data = json.loads(response_json)
|
||||||
|
|
||||||
# Provenance für die EdgeRegistry
|
# Provenance für die EdgeRegistry Dokumentation
|
||||||
for item in data:
|
for item in data:
|
||||||
item["provenance"] = "semantic_ai"
|
item["provenance"] = "semantic_ai"
|
||||||
item["line"] = f"ai-{provider}"
|
item["line"] = f"ai-{provider}"
|
||||||
return data
|
return data
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning(f"Smart Edge Allocation skipped for {note_id}: {e}")
|
logger.warning(f"Smart Edge Allocation failed for {note_id} on {provider}: {e}")
|
||||||
return []
|
return []
|
||||||
|
|
||||||
async def process_file(
|
async def process_file(
|
||||||
|
|
@ -214,7 +218,7 @@ class IngestionService:
|
||||||
logger.error(f"Payload build failed: {e}")
|
logger.error(f"Payload build failed: {e}")
|
||||||
return {**result, "error": f"Payload build failed: {str(e)}"}
|
return {**result, "error": f"Payload build failed: {str(e)}"}
|
||||||
|
|
||||||
# 4. Change Detection
|
# 4. Change Detection (Multi-Hash)
|
||||||
old_payload = None
|
old_payload = None
|
||||||
if not force_replace:
|
if not force_replace:
|
||||||
old_payload = self._fetch_note_payload(note_id)
|
old_payload = self._fetch_note_payload(note_id)
|
||||||
|
|
@ -255,7 +259,7 @@ class IngestionService:
|
||||||
edges = []
|
edges = []
|
||||||
context = {"file": file_path, "note_id": note_id}
|
context = {"file": file_path, "note_id": note_id}
|
||||||
|
|
||||||
# A. Explizite User-Kanten
|
# A. Explizite User-Kanten (Wiki-Links)
|
||||||
explicit_edges = extract_edges_with_context(parsed)
|
explicit_edges = extract_edges_with_context(parsed)
|
||||||
for e in explicit_edges:
|
for e in explicit_edges:
|
||||||
e["kind"] = edge_registry.resolve(edge_type=e["kind"], provenance="explicit", context={**context, "line": e.get("line")})
|
e["kind"] = edge_registry.resolve(edge_type=e["kind"], provenance="explicit", context={**context, "line": e.get("line")})
|
||||||
|
|
@ -267,7 +271,7 @@ class IngestionService:
|
||||||
e["kind"] = edge_registry.resolve(edge_type=e.get("kind"), provenance="semantic_ai", context={**context, "line": e.get("line")})
|
e["kind"] = edge_registry.resolve(edge_type=e.get("kind"), provenance="semantic_ai", context={**context, "line": e.get("line")})
|
||||||
edges.append(e)
|
edges.append(e)
|
||||||
|
|
||||||
# C. System-Kanten
|
# C. System-Kanten (Graph-Struktur)
|
||||||
try:
|
try:
|
||||||
raw_system_edges = build_edges_for_note(note_id, chunk_pls, note_level_references=note_pl.get("references", []), include_note_scope_refs=note_scope_refs)
|
raw_system_edges = build_edges_for_note(note_id, chunk_pls, note_level_references=note_pl.get("references", []), include_note_scope_refs=note_scope_refs)
|
||||||
except TypeError:
|
except TypeError:
|
||||||
|
|
@ -282,7 +286,7 @@ class IngestionService:
|
||||||
logger.error(f"Processing failed: {e}", exc_info=True)
|
logger.error(f"Processing failed: {e}", exc_info=True)
|
||||||
return {**result, "error": f"Processing failed: {str(e)}"}
|
return {**result, "error": f"Processing failed: {str(e)}"}
|
||||||
|
|
||||||
# 6. Upsert
|
# 6. Upsert in Qdrant
|
||||||
try:
|
try:
|
||||||
if purge_before and has_old: self._purge_artifacts(note_id)
|
if purge_before and has_old: self._purge_artifacts(note_id)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,8 +1,10 @@
|
||||||
"""
|
"""
|
||||||
FILE: app/services/llm_service.py
|
FILE: app/services/llm_service.py
|
||||||
DESCRIPTION: Hybrid-Client für Ollama, Google GenAI und OpenRouter.
|
DESCRIPTION: Hybrid-Client für Ollama, Google GenAI (Gemini) und OpenRouter.
|
||||||
Verwaltet provider-spezifische Prompts und Background-Last.
|
Verwaltet provider-spezifische Prompts und Background-Last.
|
||||||
VERSION: 3.3.0 (Full SDK Integration)
|
WP-20: Optimiertes Fallback-Management zum Schutz von Cloud-Quoten.
|
||||||
|
VERSION: 3.3.1
|
||||||
|
STATUS: Active
|
||||||
"""
|
"""
|
||||||
import httpx
|
import httpx
|
||||||
import yaml
|
import yaml
|
||||||
|
|
@ -11,7 +13,7 @@ import asyncio
|
||||||
import json
|
import json
|
||||||
from google import genai
|
from google import genai
|
||||||
from google.genai import types
|
from google.genai import types
|
||||||
from openai import AsyncOpenAI # Für OpenRouter
|
from openai import AsyncOpenAI # Für OpenRouter (OpenAI-kompatibel)
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Optional, Dict, Any, Literal
|
from typing import Optional, Dict, Any, Literal
|
||||||
from app.config import get_settings
|
from app.config import get_settings
|
||||||
|
|
@ -19,16 +21,17 @@ from app.config import get_settings
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
class LLMService:
|
class LLMService:
|
||||||
|
# GLOBALER SEMAPHOR für Hintergrund-Last Steuerung (WP-06)
|
||||||
_background_semaphore = None
|
_background_semaphore = None
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.settings = get_settings()
|
self.settings = get_settings()
|
||||||
self.prompts = self._load_prompts()
|
self.prompts = self._load_prompts()
|
||||||
|
|
||||||
# WP-06: Semaphore-Initialisierung
|
# Initialisiere Semaphore einmalig auf Klassen-Ebene
|
||||||
if LLMService._background_semaphore is None:
|
if LLMService._background_semaphore is None:
|
||||||
limit = self.settings.BACKGROUND_LIMIT
|
limit = getattr(self.settings, "BACKGROUND_LIMIT", 2)
|
||||||
logger.info(f"🚦 LLMService: Background Semaphore initialized with limit: {limit}")
|
logger.info(f"🚦 LLMService: Initializing Background Semaphore with limit: {limit}")
|
||||||
LLMService._background_semaphore = asyncio.Semaphore(limit)
|
LLMService._background_semaphore = asyncio.Semaphore(limit)
|
||||||
|
|
||||||
# 1. Lokaler Ollama Client
|
# 1. Lokaler Ollama Client
|
||||||
|
|
@ -53,6 +56,7 @@ class LLMService:
|
||||||
logger.info("🛰️ LLMService: OpenRouter Integration active.")
|
logger.info("🛰️ LLMService: OpenRouter Integration active.")
|
||||||
|
|
||||||
def _load_prompts(self) -> dict:
|
def _load_prompts(self) -> dict:
|
||||||
|
"""Lädt die Prompt-Konfiguration aus der YAML-Datei."""
|
||||||
path = Path(self.settings.PROMPTS_PATH)
|
path = Path(self.settings.PROMPTS_PATH)
|
||||||
if not path.exists(): return {}
|
if not path.exists(): return {}
|
||||||
try:
|
try:
|
||||||
|
|
@ -62,11 +66,16 @@ class LLMService:
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
def get_prompt(self, key: str, provider: str = None) -> str:
|
def get_prompt(self, key: str, provider: str = None) -> str:
|
||||||
"""Hole provider-spezifisches Template mit Fallback-Kaskade."""
|
"""
|
||||||
|
Hole provider-spezifisches Template mit intelligenter Text-Kaskade.
|
||||||
|
HINWEIS: Dies ist nur ein Text-Lookup und verbraucht kein API-Kontingent.
|
||||||
|
Kaskade: Gewählter Provider -> Gemini (Cloud-Stil) -> Ollama (Basis-Stil).
|
||||||
|
"""
|
||||||
active_provider = provider or self.settings.MINDNET_LLM_PROVIDER
|
active_provider = provider or self.settings.MINDNET_LLM_PROVIDER
|
||||||
data = self.prompts.get(key, "")
|
data = self.prompts.get(key, "")
|
||||||
if isinstance(data, dict):
|
if isinstance(data, dict):
|
||||||
return data.get(active_provider, data.get("ollama", ""))
|
# Wir versuchen erst den Provider, dann Gemini (weil ähnlich leistungsfähig), dann Ollama
|
||||||
|
return data.get(active_provider, data.get("gemini", data.get("ollama", "")))
|
||||||
return str(data)
|
return str(data)
|
||||||
|
|
||||||
async def generate_raw_response(
|
async def generate_raw_response(
|
||||||
|
|
@ -76,35 +85,43 @@ class LLMService:
|
||||||
provider: Optional[str] = None,
|
provider: Optional[str] = None,
|
||||||
model_override: Optional[str] = None
|
model_override: Optional[str] = None
|
||||||
) -> str:
|
) -> str:
|
||||||
"""Einstiegspunkt mit Priority-Handling."""
|
"""Haupteinstiegspunkt für LLM-Anfragen mit Priorisierung."""
|
||||||
target_provider = provider or self.settings.MINDNET_LLM_PROVIDER
|
target_provider = provider or self.settings.MINDNET_LLM_PROVIDER
|
||||||
|
|
||||||
if priority == "background":
|
if priority == "background":
|
||||||
async with LLMService._background_semaphore:
|
async with LLMService._background_semaphore:
|
||||||
return await self._dispatch(target_provider, prompt, system, force_json, max_retries, base_delay, model_override)
|
return await self._dispatch(target_provider, prompt, system, force_json, max_retries, base_delay, model_override)
|
||||||
|
|
||||||
return await self._dispatch(target_provider, prompt, system, force_json, max_retries, base_delay, model_override)
|
return await self._dispatch(target_provider, prompt, system, force_json, max_retries, base_delay, model_override)
|
||||||
|
|
||||||
async def _dispatch(self, provider, prompt, system, force_json, max_retries, base_delay, model_override):
|
async def _dispatch(self, provider, prompt, system, force_json, max_retries, base_delay, model_override):
|
||||||
|
"""Routet die Anfrage an den physikalischen API-Provider."""
|
||||||
try:
|
try:
|
||||||
if provider == "openrouter" and self.openrouter_client:
|
if provider == "openrouter" and self.openrouter_client:
|
||||||
return await self._execute_openrouter(prompt, system, force_json, model_override)
|
return await self._execute_openrouter(prompt, system, force_json, model_override)
|
||||||
|
|
||||||
if provider == "gemini" and self.google_client:
|
if provider == "gemini" and self.google_client:
|
||||||
return await self._execute_google(prompt, system, force_json, model_override)
|
return await self._execute_google(prompt, system, force_json, model_override)
|
||||||
|
|
||||||
|
# Default/Fallback zu Ollama
|
||||||
return await self._execute_ollama(prompt, system, force_json, max_retries, base_delay)
|
return await self._execute_ollama(prompt, system, force_json, max_retries, base_delay)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
# QUOTEN-SCHUTZ: Wenn Cloud (OpenRouter/Gemini) fehlschlägt,
|
||||||
|
# gehen wir IMMER zu Ollama, niemals von OpenRouter zu Gemini.
|
||||||
if self.settings.LLM_FALLBACK_ENABLED and provider != "ollama":
|
if self.settings.LLM_FALLBACK_ENABLED and provider != "ollama":
|
||||||
logger.warning(f"🔄 Provider {provider} failed: {e}. Falling back to Ollama.")
|
logger.warning(f"🔄 Provider {provider} failed: {e}. Falling back to LOCAL OLLAMA to protect cloud quotas.")
|
||||||
return await self._execute_ollama(prompt, system, force_json, max_retries, base_delay)
|
return await self._execute_ollama(prompt, system, force_json, max_retries, base_delay)
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
async def _execute_google(self, prompt, system, force_json, model_override):
|
async def _execute_google(self, prompt, system, force_json, model_override):
|
||||||
"""Native Google SDK Integration."""
|
"""Native Google SDK Integration (Gemini)."""
|
||||||
model = model_override or self.settings.GEMINI_MODEL
|
model = model_override or self.settings.GEMINI_MODEL
|
||||||
config = types.GenerateContentConfig(
|
config = types.GenerateContentConfig(
|
||||||
system_instruction=system,
|
system_instruction=system,
|
||||||
response_mime_type="application/json" if force_json else "text/plain"
|
response_mime_type="application/json" if force_json else "text/plain"
|
||||||
)
|
)
|
||||||
# Synchroner SDK-Call in Thread auslagern
|
# SDK Call in Thread auslagern, da die Google API blocking sein kann
|
||||||
response = await asyncio.to_thread(
|
response = await asyncio.to_thread(
|
||||||
self.google_client.models.generate_content,
|
self.google_client.models.generate_content,
|
||||||
model=model, contents=prompt, config=config
|
model=model, contents=prompt, config=config
|
||||||
|
|
@ -112,10 +129,11 @@ class LLMService:
|
||||||
return response.text.strip()
|
return response.text.strip()
|
||||||
|
|
||||||
async def _execute_openrouter(self, prompt, system, force_json, model_override):
|
async def _execute_openrouter(self, prompt, system, force_json, model_override):
|
||||||
"""OpenRouter (OpenAI-kompatibel)."""
|
"""OpenRouter API Integration (OpenAI-kompatibel)."""
|
||||||
model = model_override or self.settings.OPENROUTER_MODEL
|
model = model_override or getattr(self.settings, "OPENROUTER_MODEL", "google/gemma-2-9b-it:free")
|
||||||
messages = []
|
messages = []
|
||||||
if system: messages.append({"role": "system", "content": system})
|
if system:
|
||||||
|
messages.append({"role": "system", "content": system})
|
||||||
messages.append({"role": "user", "content": prompt})
|
messages.append({"role": "user", "content": prompt})
|
||||||
|
|
||||||
response = await self.openrouter_client.chat.completions.create(
|
response = await self.openrouter_client.chat.completions.create(
|
||||||
|
|
@ -126,10 +144,15 @@ class LLMService:
|
||||||
return response.choices[0].message.content.strip()
|
return response.choices[0].message.content.strip()
|
||||||
|
|
||||||
async def _execute_ollama(self, prompt, system, force_json, max_retries, base_delay):
|
async def _execute_ollama(self, prompt, system, force_json, max_retries, base_delay):
|
||||||
"""Ollama mit exponentiellem Backoff."""
|
"""Lokaler Ollama Call mit exponentiellem Backoff."""
|
||||||
payload = {
|
payload = {
|
||||||
"model": self.settings.LLM_MODEL, "prompt": prompt, "stream": False,
|
"model": self.settings.LLM_MODEL,
|
||||||
"options": {"temperature": 0.1 if force_json else 0.7, "num_ctx": 8192}
|
"prompt": prompt,
|
||||||
|
"stream": False,
|
||||||
|
"options": {
|
||||||
|
"temperature": 0.1 if force_json else 0.7,
|
||||||
|
"num_ctx": 8192
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if force_json: payload["format"] = "json"
|
if force_json: payload["format"] = "json"
|
||||||
if system: payload["system"] = system
|
if system: payload["system"] = system
|
||||||
|
|
@ -142,18 +165,28 @@ class LLMService:
|
||||||
return res.json().get("response", "").strip()
|
return res.json().get("response", "").strip()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
attempt += 1
|
attempt += 1
|
||||||
if attempt > max_retries: raise e
|
if attempt > max_retries:
|
||||||
wait = base_delay * (2 ** (attempt - 1))
|
logger.error(f"Ollama Error after {attempt} retries: {e}")
|
||||||
logger.warning(f"⚠️ Ollama retry {attempt} in {wait}s...")
|
raise e
|
||||||
await asyncio.sleep(wait)
|
wait_time = base_delay * (2 ** (attempt - 1))
|
||||||
|
logger.warning(f"⚠️ Ollama attempt {attempt} failed. Retrying in {wait_time}s...")
|
||||||
|
await asyncio.sleep(wait_time)
|
||||||
|
|
||||||
async def generate_rag_response(self, query: str, context_str: str) -> str:
|
async def generate_rag_response(self, query: str, context_str: str) -> str:
|
||||||
"""Vollständiger RAG-Wrapper."""
|
"""Vollständiges RAG Chat-Interface."""
|
||||||
provider = self.settings.MINDNET_LLM_PROVIDER
|
provider = self.settings.MINDNET_LLM_PROVIDER
|
||||||
system = self.get_prompt("system_prompt", provider)
|
system_prompt = self.get_prompt("system_prompt", provider)
|
||||||
template = self.get_prompt("rag_template", provider)
|
rag_template = self.get_prompt("rag_template", provider)
|
||||||
final_prompt = template.format(context_str=context_str, query=query)
|
|
||||||
return await self.generate_raw_response(final_prompt, system=system, priority="realtime")
|
final_prompt = rag_template.format(context_str=context_str, query=query)
|
||||||
|
|
||||||
|
return await self.generate_raw_response(
|
||||||
|
final_prompt,
|
||||||
|
system=system_prompt,
|
||||||
|
priority="realtime"
|
||||||
|
)
|
||||||
|
|
||||||
async def close(self):
|
async def close(self):
|
||||||
await self.ollama_client.aclose()
|
"""Schließt die HTTP-Verbindungen."""
|
||||||
|
if self.ollama_client:
|
||||||
|
await self.ollama_client.aclose()
|
||||||
|
|
@ -32,6 +32,8 @@ rag_template:
|
||||||
Analysiere diesen Kontext meines digitalen Zwillings:
|
Analysiere diesen Kontext meines digitalen Zwillings:
|
||||||
{context_str}
|
{context_str}
|
||||||
Beantworte die Anfrage detailliert und prüfe auf Widersprüche: {query}
|
Beantworte die Anfrage detailliert und prüfe auf Widersprüche: {query}
|
||||||
|
openrouter: "Kontext-Analyse für Gemma/Llama: {context_str}\n\nAnfrage: {query}"
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------
|
# ---------------------------------------------------------
|
||||||
# 2. DECISION: Strategie & Abwägung (Intent: DECISION)
|
# 2. DECISION: Strategie & Abwägung (Intent: DECISION)
|
||||||
|
|
@ -59,7 +61,7 @@ decision_template:
|
||||||
gemini: |
|
gemini: |
|
||||||
Agierte als Senior Strategy Consultant. Nutze den Kontext {context_str}, um die Frage {query}
|
Agierte als Senior Strategy Consultant. Nutze den Kontext {context_str}, um die Frage {query}
|
||||||
tiefgreifend gegen meine langfristigen Ziele abzuwägen.
|
tiefgreifend gegen meine langfristigen Ziele abzuwägen.
|
||||||
|
openrouter: "Strategischer Check (OpenRouter): {query}\n\nReferenzdaten: {context_str}"
|
||||||
# ---------------------------------------------------------
|
# ---------------------------------------------------------
|
||||||
# 3. EMPATHY: Der Spiegel / "Ich"-Modus (Intent: EMPATHY)
|
# 3. EMPATHY: Der Spiegel / "Ich"-Modus (Intent: EMPATHY)
|
||||||
# ---------------------------------------------------------
|
# ---------------------------------------------------------
|
||||||
|
|
@ -179,3 +181,7 @@ edge_allocation_template:
|
||||||
Finde auch implizite Verbindungen.
|
Finde auch implizite Verbindungen.
|
||||||
JSON: [{"to": "X", "kind": "Y", "reason": "Z"}].
|
JSON: [{"to": "X", "kind": "Y", "reason": "Z"}].
|
||||||
TEXT: {text}
|
TEXT: {text}
|
||||||
|
openrouter: |
|
||||||
|
Analysiere den Text für den Graphen. Identifiziere semantische Verbindungen.
|
||||||
|
Output JSON: [{"to": "X", "kind": "Y"}].
|
||||||
|
Text: {text}
|
||||||
Loading…
Reference in New Issue
Block a user