Mit ChatGBT Überarbeitetes Modell WP20
This commit is contained in:
parent
5278c75ac1
commit
4ab44e36a2
|
|
@ -4,7 +4,9 @@ DESCRIPTION: Hybrid-Client für Ollama, Google GenAI (Gemini) und OpenRouter.
|
|||
Verwaltet provider-spezifische Prompts und Background-Last.
|
||||
WP-20: Optimiertes Fallback-Management zum Schutz von Cloud-Quoten.
|
||||
WP-20 Fix: Bulletproof Prompt-Auflösung für format() Aufrufe.
|
||||
VERSION: 3.3.2
|
||||
WP-22/JSON: Optionales JSON-Schema + strict (für OpenRouter structured outputs),
|
||||
OHNE Breaking Changes (neue Parameter nur am Ende).
|
||||
VERSION: 3.3.3
|
||||
STATUS: Active
|
||||
"""
|
||||
import httpx
|
||||
|
|
@ -14,13 +16,14 @@ import asyncio
|
|||
import json
|
||||
from google import genai
|
||||
from google.genai import types
|
||||
from openai import AsyncOpenAI # Für OpenRouter (OpenAI-kompatibel)
|
||||
from openai import AsyncOpenAI # Für OpenRouter (OpenAI-kompatibel)
|
||||
from pathlib import Path
|
||||
from typing import Optional, Dict, Any, Literal
|
||||
from app.config import get_settings
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class LLMService:
|
||||
# GLOBALER SEMAPHOR für Hintergrund-Last Steuerung (WP-06)
|
||||
_background_semaphore = None
|
||||
|
|
@ -28,16 +31,16 @@ class LLMService:
|
|||
def __init__(self):
|
||||
self.settings = get_settings()
|
||||
self.prompts = self._load_prompts()
|
||||
|
||||
|
||||
# Initialisiere Semaphore einmalig auf Klassen-Ebene
|
||||
if LLMService._background_semaphore is None:
|
||||
limit = getattr(self.settings, "BACKGROUND_LIMIT", 2)
|
||||
logger.info(f"🚦 LLMService: Initializing Background Semaphore with limit: {limit}")
|
||||
LLMService._background_semaphore = asyncio.Semaphore(limit)
|
||||
|
||||
|
||||
# 1. Lokaler Ollama Client
|
||||
self.ollama_client = httpx.AsyncClient(
|
||||
base_url=self.settings.OLLAMA_URL,
|
||||
base_url=self.settings.OLLAMA_URL,
|
||||
timeout=httpx.Timeout(self.settings.LLM_TIMEOUT)
|
||||
)
|
||||
|
||||
|
|
@ -74,63 +77,126 @@ class LLMService:
|
|||
Hole provider-spezifisches Template mit intelligenter Text-Kaskade.
|
||||
HINWEIS: Dies ist nur ein Text-Lookup und verbraucht kein API-Kontingent.
|
||||
Kaskade: Gewählter Provider -> Gemini (Cloud-Stil) -> Ollama (Basis-Stil).
|
||||
|
||||
|
||||
WP-20 Fix: Garantiert die Rückgabe eines Strings, um AttributeError zu vermeiden.
|
||||
"""
|
||||
active_provider = provider or self.settings.MINDNET_LLM_PROVIDER
|
||||
data = self.prompts.get(key, "")
|
||||
|
||||
|
||||
if isinstance(data, dict):
|
||||
# Wir versuchen erst den Provider, dann Gemini (weil ähnlich leistungsfähig), dann Ollama
|
||||
val = data.get(active_provider, data.get("gemini", data.get("ollama", "")))
|
||||
|
||||
|
||||
# Falls val durch YAML-Fehler immer noch ein Dict ist, extrahiere ersten String
|
||||
if isinstance(val, dict):
|
||||
logger.warning(f"⚠️ [LLMService] Nested dictionary detected for key '{key}'. Using first entry.")
|
||||
val = next(iter(val.values()), "") if val else ""
|
||||
return str(val)
|
||||
|
||||
|
||||
return str(data)
|
||||
|
||||
async def generate_raw_response(
|
||||
self, prompt: str, system: str = None, force_json: bool = False,
|
||||
max_retries: int = 2, base_delay: float = 2.0,
|
||||
self,
|
||||
prompt: str,
|
||||
system: str = None,
|
||||
force_json: bool = False,
|
||||
max_retries: int = 2,
|
||||
base_delay: float = 2.0,
|
||||
priority: Literal["realtime", "background"] = "realtime",
|
||||
provider: Optional[str] = None,
|
||||
model_override: Optional[str] = None
|
||||
model_override: Optional[str] = None,
|
||||
# --- NEW (am Ende => rückwärtskompatibel!) ---
|
||||
json_schema: Optional[Dict[str, Any]] = None,
|
||||
json_schema_name: str = "mindnet_json",
|
||||
strict_json_schema: bool = True
|
||||
) -> str:
|
||||
"""Haupteinstiegspunkt für LLM-Anfragen mit Priorisierung."""
|
||||
"""
|
||||
Haupteinstiegspunkt für LLM-Anfragen mit Priorisierung.
|
||||
|
||||
force_json:
|
||||
- Ollama: nutzt payload["format"]="json"
|
||||
- Gemini: nutzt response_mime_type="application/json"
|
||||
- OpenRouter: nutzt response_format=json_object (Fallback) oder json_schema (structured outputs)
|
||||
|
||||
json_schema + strict_json_schema (nur OpenRouter relevant):
|
||||
- Wenn json_schema gesetzt ist UND force_json=True -> response_format.type="json_schema"
|
||||
- strict_json_schema wird an OpenRouter/Provider weitergereicht (best effort je nach Provider)
|
||||
"""
|
||||
target_provider = provider or self.settings.MINDNET_LLM_PROVIDER
|
||||
|
||||
|
||||
if priority == "background":
|
||||
async with LLMService._background_semaphore:
|
||||
return await self._dispatch(target_provider, prompt, system, force_json, max_retries, base_delay, model_override)
|
||||
|
||||
return await self._dispatch(target_provider, prompt, system, force_json, max_retries, base_delay, model_override)
|
||||
return await self._dispatch(
|
||||
target_provider,
|
||||
prompt,
|
||||
system,
|
||||
force_json,
|
||||
max_retries,
|
||||
base_delay,
|
||||
model_override,
|
||||
json_schema,
|
||||
json_schema_name,
|
||||
strict_json_schema
|
||||
)
|
||||
|
||||
async def _dispatch(self, provider, prompt, system, force_json, max_retries, base_delay, model_override):
|
||||
return await self._dispatch(
|
||||
target_provider,
|
||||
prompt,
|
||||
system,
|
||||
force_json,
|
||||
max_retries,
|
||||
base_delay,
|
||||
model_override,
|
||||
json_schema,
|
||||
json_schema_name,
|
||||
strict_json_schema
|
||||
)
|
||||
|
||||
async def _dispatch(
|
||||
self,
|
||||
provider: str,
|
||||
prompt: str,
|
||||
system: Optional[str],
|
||||
force_json: bool,
|
||||
max_retries: int,
|
||||
base_delay: float,
|
||||
model_override: Optional[str],
|
||||
json_schema: Optional[Dict[str, Any]],
|
||||
json_schema_name: str,
|
||||
strict_json_schema: bool
|
||||
) -> str:
|
||||
"""Routet die Anfrage an den physikalischen API-Provider."""
|
||||
try:
|
||||
if provider == "openrouter" and self.openrouter_client:
|
||||
return await self._execute_openrouter(prompt, system, force_json, model_override)
|
||||
|
||||
return await self._execute_openrouter(
|
||||
prompt=prompt,
|
||||
system=system,
|
||||
force_json=force_json,
|
||||
model_override=model_override,
|
||||
json_schema=json_schema,
|
||||
json_schema_name=json_schema_name,
|
||||
strict_json_schema=strict_json_schema
|
||||
)
|
||||
|
||||
if provider == "gemini" and self.google_client:
|
||||
return await self._execute_google(prompt, system, force_json, model_override)
|
||||
|
||||
|
||||
# Default/Fallback zu Ollama
|
||||
return await self._execute_ollama(prompt, system, force_json, max_retries, base_delay)
|
||||
|
||||
|
||||
except Exception as e:
|
||||
# QUOTEN-SCHUTZ: Wenn Cloud (OpenRouter/Gemini) fehlschlägt,
|
||||
# QUOTEN-SCHUTZ: Wenn Cloud (OpenRouter/Gemini) fehlschlägt,
|
||||
# gehen wir IMMER zu Ollama, niemals von OpenRouter zu Gemini.
|
||||
if self.settings.LLM_FALLBACK_ENABLED and provider != "ollama":
|
||||
logger.warning(f"🔄 Provider {provider} failed: {e}. Falling back to LOCAL OLLAMA to protect cloud quotas.")
|
||||
logger.warning(
|
||||
f"🔄 Provider {provider} failed: {e}. Falling back to LOCAL OLLAMA to protect cloud quotas."
|
||||
)
|
||||
return await self._execute_ollama(prompt, system, force_json, max_retries, base_delay)
|
||||
raise e
|
||||
|
||||
async def _execute_google(self, prompt, system, force_json, model_override):
|
||||
"""Native Google SDK Integration (Gemini)."""
|
||||
# Nutzt GEMINI_MODEL aus config.py falls kein override (z.B. für Ingestion) übergeben wurde
|
||||
# Nutzt GEMINI_MODEL aus config.py falls kein override übergeben wurde
|
||||
model = model_override or self.settings.GEMINI_MODEL
|
||||
config = types.GenerateContentConfig(
|
||||
system_instruction=system,
|
||||
|
|
@ -143,19 +209,52 @@ class LLMService:
|
|||
)
|
||||
return response.text.strip()
|
||||
|
||||
async def _execute_openrouter(self, prompt, system, force_json, model_override):
|
||||
"""OpenRouter API Integration (OpenAI-kompatibel)."""
|
||||
# Nutzt OPENROUTER_MODEL aus config.py (v0.6.2)
|
||||
async def _execute_openrouter(
|
||||
self,
|
||||
prompt: str,
|
||||
system: Optional[str],
|
||||
force_json: bool,
|
||||
model_override: Optional[str],
|
||||
# --- NEW (optional) ---
|
||||
json_schema: Optional[Dict[str, Any]] = None,
|
||||
json_schema_name: str = "mindnet_json",
|
||||
strict_json_schema: bool = True
|
||||
) -> str:
|
||||
"""
|
||||
OpenRouter API Integration (OpenAI-kompatibel).
|
||||
|
||||
force_json=True:
|
||||
- Ohne json_schema -> response_format={"type":"json_object"}
|
||||
- Mit json_schema -> response_format={"type":"json_schema", "json_schema": {..., "strict": True}}
|
||||
|
||||
Wichtig: response_format NICHT als None senden (robuster gegenüber SDK/Provider).
|
||||
"""
|
||||
# Nutzt OPENROUTER_MODEL aus config.py
|
||||
model = model_override or self.settings.OPENROUTER_MODEL
|
||||
messages = []
|
||||
if system:
|
||||
messages.append({"role": "system", "content": system})
|
||||
messages.append({"role": "user", "content": prompt})
|
||||
|
||||
|
||||
kwargs: Dict[str, Any] = {}
|
||||
|
||||
if force_json:
|
||||
if json_schema:
|
||||
kwargs["response_format"] = {
|
||||
"type": "json_schema",
|
||||
"json_schema": {
|
||||
"name": json_schema_name,
|
||||
"strict": strict_json_schema,
|
||||
"schema": json_schema
|
||||
}
|
||||
}
|
||||
else:
|
||||
kwargs["response_format"] = {"type": "json_object"}
|
||||
|
||||
response = await self.openrouter_client.chat.completions.create(
|
||||
model=model,
|
||||
messages=messages,
|
||||
response_format={"type": "json_object"} if force_json else None
|
||||
**kwargs
|
||||
)
|
||||
return response.choices[0].message.content.strip()
|
||||
|
||||
|
|
@ -167,11 +266,13 @@ class LLMService:
|
|||
"stream": False,
|
||||
"options": {
|
||||
"temperature": 0.1 if force_json else 0.7,
|
||||
"num_ctx": 8192
|
||||
"num_ctx": 8192
|
||||
}
|
||||
}
|
||||
if force_json: payload["format"] = "json"
|
||||
if system: payload["system"] = system
|
||||
if force_json:
|
||||
payload["format"] = "json"
|
||||
if system:
|
||||
payload["system"] = system
|
||||
|
||||
attempt = 0
|
||||
while True:
|
||||
|
|
@ -193,16 +294,16 @@ class LLMService:
|
|||
provider = self.settings.MINDNET_LLM_PROVIDER
|
||||
system_prompt = self.get_prompt("system_prompt", provider)
|
||||
rag_template = self.get_prompt("rag_template", provider)
|
||||
|
||||
|
||||
final_prompt = rag_template.format(context_str=context_str, query=query)
|
||||
|
||||
|
||||
return await self.generate_raw_response(
|
||||
final_prompt,
|
||||
system=system_prompt,
|
||||
final_prompt,
|
||||
system=system_prompt,
|
||||
priority="realtime"
|
||||
)
|
||||
|
||||
async def close(self):
|
||||
"""Schließt die HTTP-Verbindungen."""
|
||||
if self.ollama_client:
|
||||
await self.ollama_client.aclose()
|
||||
await self.ollama_client.aclose()
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@ FILE: app/services/semantic_analyzer.py
|
|||
DESCRIPTION: KI-gestützte Kanten-Validierung. Nutzt LLM (Background-Priority), um Kanten präzise einem Chunk zuzuordnen.
|
||||
WP-20 Fix: Volle Kompatibilität mit der gehärteten LLMService (v3.3.2) Kaskade.
|
||||
WP-22: Integration von valid_types zur Halluzinations-Vermeidung.
|
||||
VERSION: 2.2.2
|
||||
VERSION: 2.2.3
|
||||
STATUS: Active
|
||||
DEPENDENCIES: app.services.llm_service, app.services.edge_registry, json, logging
|
||||
LAST_ANALYSIS: 2025-12-23
|
||||
|
|
@ -29,26 +29,39 @@ class SemanticAnalyzer:
|
|||
"""
|
||||
Prüft, ob ein String eine valide Kante im Format 'kind:target' ist.
|
||||
Verhindert, dass LLM-Geschwätz als Kante durchrutscht.
|
||||
|
||||
WP-22 Erweiterung:
|
||||
- kind muss (wenn valid_types verfügbar) im kontrollierten Vokabular enthalten sein.
|
||||
"""
|
||||
if not isinstance(edge_str, str) or ":" not in edge_str:
|
||||
return False
|
||||
|
||||
|
||||
parts = edge_str.split(":", 1)
|
||||
kind = parts[0].strip()
|
||||
target = parts[1].strip()
|
||||
|
||||
|
||||
# Regel 1: Ein 'kind' (Beziehungstyp) darf keine Leerzeichen enthalten.
|
||||
if " " in kind:
|
||||
return False
|
||||
|
||||
|
||||
# Regel 2: Plausible Länge für den Typ
|
||||
if len(kind) > 40 or len(kind) < 2:
|
||||
return False
|
||||
|
||||
|
||||
# Regel 3: Target darf nicht leer sein
|
||||
if not target:
|
||||
return False
|
||||
|
||||
|
||||
# WP-22: kontrolliertes Vokabular erzwingen (falls vorhanden/geladen)
|
||||
try:
|
||||
if hasattr(edge_registry, "valid_types") and edge_registry.valid_types:
|
||||
if kind not in edge_registry.valid_types:
|
||||
return False
|
||||
except Exception:
|
||||
# Bei Registry-Problemen lieber nicht crashen -> konservativ: ablehnen wäre auch möglich,
|
||||
# aber wir bleiben kompatibel und robust.
|
||||
pass
|
||||
|
||||
return True
|
||||
|
||||
async def assign_edges_to_chunk(self, chunk_text: str, all_edges: List[str], note_type: str) -> List[str]:
|
||||
|
|
@ -61,7 +74,7 @@ class SemanticAnalyzer:
|
|||
|
||||
# 1. Prompt laden via get_prompt (handelt die Provider-Kaskade automatisch ab)
|
||||
prompt_template = self.llm.get_prompt("edge_allocation_template")
|
||||
|
||||
|
||||
# Sicherheits-Check für die Format-Methode
|
||||
if not prompt_template or isinstance(prompt_template, dict):
|
||||
logger.warning("⚠️ [SemanticAnalyzer] Prompt 'edge_allocation_template' konnte nicht als String geladen werden. Nutze Not-Fallback.")
|
||||
|
|
@ -77,14 +90,14 @@ class SemanticAnalyzer:
|
|||
edge_registry.ensure_latest()
|
||||
valid_types_str = ", ".join(sorted(list(edge_registry.valid_types)))
|
||||
edges_str = "\n".join([f"- {e}" for e in all_edges])
|
||||
|
||||
|
||||
# LOG: Request Info
|
||||
logger.debug(f"🔍 [SemanticAnalyzer] Request: {len(chunk_text)} chars Text, {len(all_edges)} Candidates.")
|
||||
|
||||
# 3. Prompt füllen (FIX: valid_types hinzugefügt, um FormatError zu beheben)
|
||||
try:
|
||||
final_prompt = prompt_template.format(
|
||||
chunk_text=chunk_text[:3500],
|
||||
chunk_text=chunk_text[:3500],
|
||||
edge_list=edges_str,
|
||||
valid_types=valid_types_str
|
||||
)
|
||||
|
|
@ -94,10 +107,11 @@ class SemanticAnalyzer:
|
|||
|
||||
try:
|
||||
# 4. LLM Call mit Traffic Control (Background Priority)
|
||||
# NOTE: Keine neuen Parameter hier, damit es mit deinem aktuellen llm_service.py kompatibel bleibt.
|
||||
response_json = await self.llm.generate_raw_response(
|
||||
prompt=final_prompt,
|
||||
force_json=True,
|
||||
max_retries=5,
|
||||
max_retries=5,
|
||||
base_delay=5.0,
|
||||
priority="background"
|
||||
)
|
||||
|
|
@ -107,8 +121,8 @@ class SemanticAnalyzer:
|
|||
|
||||
# 5. Parsing & Cleaning
|
||||
clean_json = response_json.replace("```json", "").replace("```", "").strip()
|
||||
|
||||
if not clean_json:
|
||||
|
||||
if not clean_json:
|
||||
logger.warning("⚠️ [SemanticAnalyzer] Leere Antwort vom LLM erhalten.")
|
||||
return []
|
||||
|
||||
|
|
@ -122,15 +136,15 @@ class SemanticAnalyzer:
|
|||
|
||||
# 6. Robuste Validierung (List vs Dict)
|
||||
raw_candidates = []
|
||||
|
||||
|
||||
if isinstance(data, list):
|
||||
raw_candidates = data
|
||||
|
||||
|
||||
elif isinstance(data, dict):
|
||||
logger.info(f"ℹ️ [SemanticAnalyzer] LLM lieferte Dict statt Liste. Versuche Reparatur.")
|
||||
for key, val in data.items():
|
||||
if key.lower() in ["edges", "results", "kanten", "matches"] and isinstance(val, list):
|
||||
raw_candidates.extend(val)
|
||||
raw_candidates.extend(val)
|
||||
elif isinstance(val, str):
|
||||
raw_candidates.append(f"{key}:{val}")
|
||||
elif isinstance(val, list):
|
||||
|
|
@ -147,7 +161,7 @@ class SemanticAnalyzer:
|
|||
logger.debug(f" [SemanticAnalyzer] Invalid edge format rejected: '{e_str}'")
|
||||
|
||||
final_result = [e for e in valid_edges if ":" in e]
|
||||
|
||||
|
||||
if final_result:
|
||||
logger.info(f"✅ [SemanticAnalyzer] Success. {len(final_result)} Kanten zugewiesen.")
|
||||
else:
|
||||
|
|
@ -169,4 +183,4 @@ def get_semantic_analyzer():
|
|||
global _analyzer_instance
|
||||
if _analyzer_instance is None:
|
||||
_analyzer_instance = SemanticAnalyzer()
|
||||
return _analyzer_instance
|
||||
return _analyzer_instance
|
||||
|
|
|
|||
|
|
@ -184,13 +184,13 @@ edge_allocation_template:
|
|||
ERLAUBTE TYPEN: {valid_types}
|
||||
TEXT: {chunk_text}
|
||||
KANDIDATEN: {edge_list}
|
||||
OUTPUT: STRIKT eine flache JSON-Liste ["typ:ziel"]. Keine Objekte!
|
||||
OUTPUT: STRIKT eine flache JSON-Liste ["typ:ziel"]. Kein Text davor/danach. Wenn nichts: []. Keine Objekte!
|
||||
openrouter: |
|
||||
Filtere relevante Kanten.
|
||||
ERLAUBTE TYPEN: {valid_types}
|
||||
TEXT: {chunk_text}
|
||||
KANDIDATEN: {edge_list}
|
||||
Output: JSON-Liste ["typ:ziel"].
|
||||
OUTPUT: STRIKT JSON-Liste ["typ:ziel"]. Kein Text davor/danach. Wenn nichts: [].
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# 7. SMART EDGE ALLOCATION: Extraktion (Intent: INGEST)
|
||||
|
|
@ -221,9 +221,9 @@ edge_extraction:
|
|||
Analysiere '{note_id}'. Extrahiere semantische Beziehungen.
|
||||
ERLAUBTE TYPEN: {valid_types}
|
||||
TEXT: {text}
|
||||
OUTPUT: STRIKT JSON-Liste von Objekten: [[{{"to": "Ziel", "kind": "typ"}}]]. Keine Erklärungen!
|
||||
OUTPUT: STRIKT JSON-Array von Objekten: [{{"to":"Ziel","kind":"typ"}}]. Kein Text davor/danach. Wenn nichts: [].
|
||||
openrouter: |
|
||||
Wissensgraph-Extraktion für '{note_id}'.
|
||||
ERLAUBTE TYPEN: {valid_types}
|
||||
TEXT: {text}
|
||||
OUTPUT: STRIKT JSON-Liste von Objekten: [[{{"to": "X", "kind": "Y"}}]]. Keine Dictionaries mit Schlüsseln wie 'edges'!
|
||||
OUTPUT: STRIKT JSON-Array von Objekten: [{{"to":"X","kind":"Y"}}]. Kein Text davor/danach. Wenn nichts: []. Keine Wrapper-Objekte (z.B. kein Top-Level-Key 'edges').
|
||||
Loading…
Reference in New Issue
Block a user