Mit ChatGBT Überarbeitetes Modell WP20
This commit is contained in:
parent
5278c75ac1
commit
4ab44e36a2
|
|
@ -4,7 +4,9 @@ DESCRIPTION: Hybrid-Client für Ollama, Google GenAI (Gemini) und OpenRouter.
|
||||||
Verwaltet provider-spezifische Prompts und Background-Last.
|
Verwaltet provider-spezifische Prompts und Background-Last.
|
||||||
WP-20: Optimiertes Fallback-Management zum Schutz von Cloud-Quoten.
|
WP-20: Optimiertes Fallback-Management zum Schutz von Cloud-Quoten.
|
||||||
WP-20 Fix: Bulletproof Prompt-Auflösung für format() Aufrufe.
|
WP-20 Fix: Bulletproof Prompt-Auflösung für format() Aufrufe.
|
||||||
VERSION: 3.3.2
|
WP-22/JSON: Optionales JSON-Schema + strict (für OpenRouter structured outputs),
|
||||||
|
OHNE Breaking Changes (neue Parameter nur am Ende).
|
||||||
|
VERSION: 3.3.3
|
||||||
STATUS: Active
|
STATUS: Active
|
||||||
"""
|
"""
|
||||||
import httpx
|
import httpx
|
||||||
|
|
@ -21,6 +23,7 @@ from app.config import get_settings
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class LLMService:
|
class LLMService:
|
||||||
# GLOBALER SEMAPHOR für Hintergrund-Last Steuerung (WP-06)
|
# GLOBALER SEMAPHOR für Hintergrund-Last Steuerung (WP-06)
|
||||||
_background_semaphore = None
|
_background_semaphore = None
|
||||||
|
|
@ -93,26 +96,87 @@ class LLMService:
|
||||||
return str(data)
|
return str(data)
|
||||||
|
|
||||||
async def generate_raw_response(
|
async def generate_raw_response(
|
||||||
self, prompt: str, system: str = None, force_json: bool = False,
|
self,
|
||||||
max_retries: int = 2, base_delay: float = 2.0,
|
prompt: str,
|
||||||
|
system: str = None,
|
||||||
|
force_json: bool = False,
|
||||||
|
max_retries: int = 2,
|
||||||
|
base_delay: float = 2.0,
|
||||||
priority: Literal["realtime", "background"] = "realtime",
|
priority: Literal["realtime", "background"] = "realtime",
|
||||||
provider: Optional[str] = None,
|
provider: Optional[str] = None,
|
||||||
model_override: Optional[str] = None
|
model_override: Optional[str] = None,
|
||||||
|
# --- NEW (am Ende => rückwärtskompatibel!) ---
|
||||||
|
json_schema: Optional[Dict[str, Any]] = None,
|
||||||
|
json_schema_name: str = "mindnet_json",
|
||||||
|
strict_json_schema: bool = True
|
||||||
) -> str:
|
) -> str:
|
||||||
"""Haupteinstiegspunkt für LLM-Anfragen mit Priorisierung."""
|
"""
|
||||||
|
Haupteinstiegspunkt für LLM-Anfragen mit Priorisierung.
|
||||||
|
|
||||||
|
force_json:
|
||||||
|
- Ollama: nutzt payload["format"]="json"
|
||||||
|
- Gemini: nutzt response_mime_type="application/json"
|
||||||
|
- OpenRouter: nutzt response_format=json_object (Fallback) oder json_schema (structured outputs)
|
||||||
|
|
||||||
|
json_schema + strict_json_schema (nur OpenRouter relevant):
|
||||||
|
- Wenn json_schema gesetzt ist UND force_json=True -> response_format.type="json_schema"
|
||||||
|
- strict_json_schema wird an OpenRouter/Provider weitergereicht (best effort je nach Provider)
|
||||||
|
"""
|
||||||
target_provider = provider or self.settings.MINDNET_LLM_PROVIDER
|
target_provider = provider or self.settings.MINDNET_LLM_PROVIDER
|
||||||
|
|
||||||
if priority == "background":
|
if priority == "background":
|
||||||
async with LLMService._background_semaphore:
|
async with LLMService._background_semaphore:
|
||||||
return await self._dispatch(target_provider, prompt, system, force_json, max_retries, base_delay, model_override)
|
return await self._dispatch(
|
||||||
|
target_provider,
|
||||||
|
prompt,
|
||||||
|
system,
|
||||||
|
force_json,
|
||||||
|
max_retries,
|
||||||
|
base_delay,
|
||||||
|
model_override,
|
||||||
|
json_schema,
|
||||||
|
json_schema_name,
|
||||||
|
strict_json_schema
|
||||||
|
)
|
||||||
|
|
||||||
return await self._dispatch(target_provider, prompt, system, force_json, max_retries, base_delay, model_override)
|
return await self._dispatch(
|
||||||
|
target_provider,
|
||||||
|
prompt,
|
||||||
|
system,
|
||||||
|
force_json,
|
||||||
|
max_retries,
|
||||||
|
base_delay,
|
||||||
|
model_override,
|
||||||
|
json_schema,
|
||||||
|
json_schema_name,
|
||||||
|
strict_json_schema
|
||||||
|
)
|
||||||
|
|
||||||
async def _dispatch(self, provider, prompt, system, force_json, max_retries, base_delay, model_override):
|
async def _dispatch(
|
||||||
|
self,
|
||||||
|
provider: str,
|
||||||
|
prompt: str,
|
||||||
|
system: Optional[str],
|
||||||
|
force_json: bool,
|
||||||
|
max_retries: int,
|
||||||
|
base_delay: float,
|
||||||
|
model_override: Optional[str],
|
||||||
|
json_schema: Optional[Dict[str, Any]],
|
||||||
|
json_schema_name: str,
|
||||||
|
strict_json_schema: bool
|
||||||
|
) -> str:
|
||||||
"""Routet die Anfrage an den physikalischen API-Provider."""
|
"""Routet die Anfrage an den physikalischen API-Provider."""
|
||||||
try:
|
try:
|
||||||
if provider == "openrouter" and self.openrouter_client:
|
if provider == "openrouter" and self.openrouter_client:
|
||||||
return await self._execute_openrouter(prompt, system, force_json, model_override)
|
return await self._execute_openrouter(
|
||||||
|
prompt=prompt,
|
||||||
|
system=system,
|
||||||
|
force_json=force_json,
|
||||||
|
model_override=model_override,
|
||||||
|
json_schema=json_schema,
|
||||||
|
json_schema_name=json_schema_name,
|
||||||
|
strict_json_schema=strict_json_schema
|
||||||
|
)
|
||||||
|
|
||||||
if provider == "gemini" and self.google_client:
|
if provider == "gemini" and self.google_client:
|
||||||
return await self._execute_google(prompt, system, force_json, model_override)
|
return await self._execute_google(prompt, system, force_json, model_override)
|
||||||
|
|
@ -124,13 +188,15 @@ class LLMService:
|
||||||
# QUOTEN-SCHUTZ: Wenn Cloud (OpenRouter/Gemini) fehlschlägt,
|
# QUOTEN-SCHUTZ: Wenn Cloud (OpenRouter/Gemini) fehlschlägt,
|
||||||
# gehen wir IMMER zu Ollama, niemals von OpenRouter zu Gemini.
|
# gehen wir IMMER zu Ollama, niemals von OpenRouter zu Gemini.
|
||||||
if self.settings.LLM_FALLBACK_ENABLED and provider != "ollama":
|
if self.settings.LLM_FALLBACK_ENABLED and provider != "ollama":
|
||||||
logger.warning(f"🔄 Provider {provider} failed: {e}. Falling back to LOCAL OLLAMA to protect cloud quotas.")
|
logger.warning(
|
||||||
|
f"🔄 Provider {provider} failed: {e}. Falling back to LOCAL OLLAMA to protect cloud quotas."
|
||||||
|
)
|
||||||
return await self._execute_ollama(prompt, system, force_json, max_retries, base_delay)
|
return await self._execute_ollama(prompt, system, force_json, max_retries, base_delay)
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
async def _execute_google(self, prompt, system, force_json, model_override):
|
async def _execute_google(self, prompt, system, force_json, model_override):
|
||||||
"""Native Google SDK Integration (Gemini)."""
|
"""Native Google SDK Integration (Gemini)."""
|
||||||
# Nutzt GEMINI_MODEL aus config.py falls kein override (z.B. für Ingestion) übergeben wurde
|
# Nutzt GEMINI_MODEL aus config.py falls kein override übergeben wurde
|
||||||
model = model_override or self.settings.GEMINI_MODEL
|
model = model_override or self.settings.GEMINI_MODEL
|
||||||
config = types.GenerateContentConfig(
|
config = types.GenerateContentConfig(
|
||||||
system_instruction=system,
|
system_instruction=system,
|
||||||
|
|
@ -143,19 +209,52 @@ class LLMService:
|
||||||
)
|
)
|
||||||
return response.text.strip()
|
return response.text.strip()
|
||||||
|
|
||||||
async def _execute_openrouter(self, prompt, system, force_json, model_override):
|
async def _execute_openrouter(
|
||||||
"""OpenRouter API Integration (OpenAI-kompatibel)."""
|
self,
|
||||||
# Nutzt OPENROUTER_MODEL aus config.py (v0.6.2)
|
prompt: str,
|
||||||
|
system: Optional[str],
|
||||||
|
force_json: bool,
|
||||||
|
model_override: Optional[str],
|
||||||
|
# --- NEW (optional) ---
|
||||||
|
json_schema: Optional[Dict[str, Any]] = None,
|
||||||
|
json_schema_name: str = "mindnet_json",
|
||||||
|
strict_json_schema: bool = True
|
||||||
|
) -> str:
|
||||||
|
"""
|
||||||
|
OpenRouter API Integration (OpenAI-kompatibel).
|
||||||
|
|
||||||
|
force_json=True:
|
||||||
|
- Ohne json_schema -> response_format={"type":"json_object"}
|
||||||
|
- Mit json_schema -> response_format={"type":"json_schema", "json_schema": {..., "strict": True}}
|
||||||
|
|
||||||
|
Wichtig: response_format NICHT als None senden (robuster gegenüber SDK/Provider).
|
||||||
|
"""
|
||||||
|
# Nutzt OPENROUTER_MODEL aus config.py
|
||||||
model = model_override or self.settings.OPENROUTER_MODEL
|
model = model_override or self.settings.OPENROUTER_MODEL
|
||||||
messages = []
|
messages = []
|
||||||
if system:
|
if system:
|
||||||
messages.append({"role": "system", "content": system})
|
messages.append({"role": "system", "content": system})
|
||||||
messages.append({"role": "user", "content": prompt})
|
messages.append({"role": "user", "content": prompt})
|
||||||
|
|
||||||
|
kwargs: Dict[str, Any] = {}
|
||||||
|
|
||||||
|
if force_json:
|
||||||
|
if json_schema:
|
||||||
|
kwargs["response_format"] = {
|
||||||
|
"type": "json_schema",
|
||||||
|
"json_schema": {
|
||||||
|
"name": json_schema_name,
|
||||||
|
"strict": strict_json_schema,
|
||||||
|
"schema": json_schema
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
kwargs["response_format"] = {"type": "json_object"}
|
||||||
|
|
||||||
response = await self.openrouter_client.chat.completions.create(
|
response = await self.openrouter_client.chat.completions.create(
|
||||||
model=model,
|
model=model,
|
||||||
messages=messages,
|
messages=messages,
|
||||||
response_format={"type": "json_object"} if force_json else None
|
**kwargs
|
||||||
)
|
)
|
||||||
return response.choices[0].message.content.strip()
|
return response.choices[0].message.content.strip()
|
||||||
|
|
||||||
|
|
@ -170,8 +269,10 @@ class LLMService:
|
||||||
"num_ctx": 8192
|
"num_ctx": 8192
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if force_json: payload["format"] = "json"
|
if force_json:
|
||||||
if system: payload["system"] = system
|
payload["format"] = "json"
|
||||||
|
if system:
|
||||||
|
payload["system"] = system
|
||||||
|
|
||||||
attempt = 0
|
attempt = 0
|
||||||
while True:
|
while True:
|
||||||
|
|
|
||||||
|
|
@ -3,7 +3,7 @@ FILE: app/services/semantic_analyzer.py
|
||||||
DESCRIPTION: KI-gestützte Kanten-Validierung. Nutzt LLM (Background-Priority), um Kanten präzise einem Chunk zuzuordnen.
|
DESCRIPTION: KI-gestützte Kanten-Validierung. Nutzt LLM (Background-Priority), um Kanten präzise einem Chunk zuzuordnen.
|
||||||
WP-20 Fix: Volle Kompatibilität mit der gehärteten LLMService (v3.3.2) Kaskade.
|
WP-20 Fix: Volle Kompatibilität mit der gehärteten LLMService (v3.3.2) Kaskade.
|
||||||
WP-22: Integration von valid_types zur Halluzinations-Vermeidung.
|
WP-22: Integration von valid_types zur Halluzinations-Vermeidung.
|
||||||
VERSION: 2.2.2
|
VERSION: 2.2.3
|
||||||
STATUS: Active
|
STATUS: Active
|
||||||
DEPENDENCIES: app.services.llm_service, app.services.edge_registry, json, logging
|
DEPENDENCIES: app.services.llm_service, app.services.edge_registry, json, logging
|
||||||
LAST_ANALYSIS: 2025-12-23
|
LAST_ANALYSIS: 2025-12-23
|
||||||
|
|
@ -29,6 +29,9 @@ class SemanticAnalyzer:
|
||||||
"""
|
"""
|
||||||
Prüft, ob ein String eine valide Kante im Format 'kind:target' ist.
|
Prüft, ob ein String eine valide Kante im Format 'kind:target' ist.
|
||||||
Verhindert, dass LLM-Geschwätz als Kante durchrutscht.
|
Verhindert, dass LLM-Geschwätz als Kante durchrutscht.
|
||||||
|
|
||||||
|
WP-22 Erweiterung:
|
||||||
|
- kind muss (wenn valid_types verfügbar) im kontrollierten Vokabular enthalten sein.
|
||||||
"""
|
"""
|
||||||
if not isinstance(edge_str, str) or ":" not in edge_str:
|
if not isinstance(edge_str, str) or ":" not in edge_str:
|
||||||
return False
|
return False
|
||||||
|
|
@ -49,6 +52,16 @@ class SemanticAnalyzer:
|
||||||
if not target:
|
if not target:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
# WP-22: kontrolliertes Vokabular erzwingen (falls vorhanden/geladen)
|
||||||
|
try:
|
||||||
|
if hasattr(edge_registry, "valid_types") and edge_registry.valid_types:
|
||||||
|
if kind not in edge_registry.valid_types:
|
||||||
|
return False
|
||||||
|
except Exception:
|
||||||
|
# Bei Registry-Problemen lieber nicht crashen -> konservativ: ablehnen wäre auch möglich,
|
||||||
|
# aber wir bleiben kompatibel und robust.
|
||||||
|
pass
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
async def assign_edges_to_chunk(self, chunk_text: str, all_edges: List[str], note_type: str) -> List[str]:
|
async def assign_edges_to_chunk(self, chunk_text: str, all_edges: List[str], note_type: str) -> List[str]:
|
||||||
|
|
@ -94,6 +107,7 @@ class SemanticAnalyzer:
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# 4. LLM Call mit Traffic Control (Background Priority)
|
# 4. LLM Call mit Traffic Control (Background Priority)
|
||||||
|
# NOTE: Keine neuen Parameter hier, damit es mit deinem aktuellen llm_service.py kompatibel bleibt.
|
||||||
response_json = await self.llm.generate_raw_response(
|
response_json = await self.llm.generate_raw_response(
|
||||||
prompt=final_prompt,
|
prompt=final_prompt,
|
||||||
force_json=True,
|
force_json=True,
|
||||||
|
|
|
||||||
|
|
@ -184,13 +184,13 @@ edge_allocation_template:
|
||||||
ERLAUBTE TYPEN: {valid_types}
|
ERLAUBTE TYPEN: {valid_types}
|
||||||
TEXT: {chunk_text}
|
TEXT: {chunk_text}
|
||||||
KANDIDATEN: {edge_list}
|
KANDIDATEN: {edge_list}
|
||||||
OUTPUT: STRIKT eine flache JSON-Liste ["typ:ziel"]. Keine Objekte!
|
OUTPUT: STRIKT eine flache JSON-Liste ["typ:ziel"]. Kein Text davor/danach. Wenn nichts: []. Keine Objekte!
|
||||||
openrouter: |
|
openrouter: |
|
||||||
Filtere relevante Kanten.
|
Filtere relevante Kanten.
|
||||||
ERLAUBTE TYPEN: {valid_types}
|
ERLAUBTE TYPEN: {valid_types}
|
||||||
TEXT: {chunk_text}
|
TEXT: {chunk_text}
|
||||||
KANDIDATEN: {edge_list}
|
KANDIDATEN: {edge_list}
|
||||||
Output: JSON-Liste ["typ:ziel"].
|
OUTPUT: STRIKT JSON-Liste ["typ:ziel"]. Kein Text davor/danach. Wenn nichts: [].
|
||||||
|
|
||||||
# ---------------------------------------------------------
|
# ---------------------------------------------------------
|
||||||
# 7. SMART EDGE ALLOCATION: Extraktion (Intent: INGEST)
|
# 7. SMART EDGE ALLOCATION: Extraktion (Intent: INGEST)
|
||||||
|
|
@ -221,9 +221,9 @@ edge_extraction:
|
||||||
Analysiere '{note_id}'. Extrahiere semantische Beziehungen.
|
Analysiere '{note_id}'. Extrahiere semantische Beziehungen.
|
||||||
ERLAUBTE TYPEN: {valid_types}
|
ERLAUBTE TYPEN: {valid_types}
|
||||||
TEXT: {text}
|
TEXT: {text}
|
||||||
OUTPUT: STRIKT JSON-Liste von Objekten: [[{{"to": "Ziel", "kind": "typ"}}]]. Keine Erklärungen!
|
OUTPUT: STRIKT JSON-Array von Objekten: [{{"to":"Ziel","kind":"typ"}}]. Kein Text davor/danach. Wenn nichts: [].
|
||||||
openrouter: |
|
openrouter: |
|
||||||
Wissensgraph-Extraktion für '{note_id}'.
|
Wissensgraph-Extraktion für '{note_id}'.
|
||||||
ERLAUBTE TYPEN: {valid_types}
|
ERLAUBTE TYPEN: {valid_types}
|
||||||
TEXT: {text}
|
TEXT: {text}
|
||||||
OUTPUT: STRIKT JSON-Liste von Objekten: [[{{"to": "X", "kind": "Y"}}]]. Keine Dictionaries mit Schlüsseln wie 'edges'!
|
OUTPUT: STRIKT JSON-Array von Objekten: [{{"to":"X","kind":"Y"}}]. Kein Text davor/danach. Wenn nichts: []. Keine Wrapper-Objekte (z.B. kein Top-Level-Key 'edges').
|
||||||
Loading…
Reference in New Issue
Block a user