Mit ChatGBT Überarbeitetes Modell WP20
This commit is contained in:
parent
5278c75ac1
commit
4ab44e36a2
|
|
@ -4,7 +4,9 @@ DESCRIPTION: Hybrid-Client für Ollama, Google GenAI (Gemini) und OpenRouter.
|
||||||
Verwaltet provider-spezifische Prompts und Background-Last.
|
Verwaltet provider-spezifische Prompts und Background-Last.
|
||||||
WP-20: Optimiertes Fallback-Management zum Schutz von Cloud-Quoten.
|
WP-20: Optimiertes Fallback-Management zum Schutz von Cloud-Quoten.
|
||||||
WP-20 Fix: Bulletproof Prompt-Auflösung für format() Aufrufe.
|
WP-20 Fix: Bulletproof Prompt-Auflösung für format() Aufrufe.
|
||||||
VERSION: 3.3.2
|
WP-22/JSON: Optionales JSON-Schema + strict (für OpenRouter structured outputs),
|
||||||
|
OHNE Breaking Changes (neue Parameter nur am Ende).
|
||||||
|
VERSION: 3.3.3
|
||||||
STATUS: Active
|
STATUS: Active
|
||||||
"""
|
"""
|
||||||
import httpx
|
import httpx
|
||||||
|
|
@ -14,13 +16,14 @@ import asyncio
|
||||||
import json
|
import json
|
||||||
from google import genai
|
from google import genai
|
||||||
from google.genai import types
|
from google.genai import types
|
||||||
from openai import AsyncOpenAI # Für OpenRouter (OpenAI-kompatibel)
|
from openai import AsyncOpenAI # Für OpenRouter (OpenAI-kompatibel)
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Optional, Dict, Any, Literal
|
from typing import Optional, Dict, Any, Literal
|
||||||
from app.config import get_settings
|
from app.config import get_settings
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class LLMService:
|
class LLMService:
|
||||||
# GLOBALER SEMAPHOR für Hintergrund-Last Steuerung (WP-06)
|
# GLOBALER SEMAPHOR für Hintergrund-Last Steuerung (WP-06)
|
||||||
_background_semaphore = None
|
_background_semaphore = None
|
||||||
|
|
@ -28,16 +31,16 @@ class LLMService:
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.settings = get_settings()
|
self.settings = get_settings()
|
||||||
self.prompts = self._load_prompts()
|
self.prompts = self._load_prompts()
|
||||||
|
|
||||||
# Initialisiere Semaphore einmalig auf Klassen-Ebene
|
# Initialisiere Semaphore einmalig auf Klassen-Ebene
|
||||||
if LLMService._background_semaphore is None:
|
if LLMService._background_semaphore is None:
|
||||||
limit = getattr(self.settings, "BACKGROUND_LIMIT", 2)
|
limit = getattr(self.settings, "BACKGROUND_LIMIT", 2)
|
||||||
logger.info(f"🚦 LLMService: Initializing Background Semaphore with limit: {limit}")
|
logger.info(f"🚦 LLMService: Initializing Background Semaphore with limit: {limit}")
|
||||||
LLMService._background_semaphore = asyncio.Semaphore(limit)
|
LLMService._background_semaphore = asyncio.Semaphore(limit)
|
||||||
|
|
||||||
# 1. Lokaler Ollama Client
|
# 1. Lokaler Ollama Client
|
||||||
self.ollama_client = httpx.AsyncClient(
|
self.ollama_client = httpx.AsyncClient(
|
||||||
base_url=self.settings.OLLAMA_URL,
|
base_url=self.settings.OLLAMA_URL,
|
||||||
timeout=httpx.Timeout(self.settings.LLM_TIMEOUT)
|
timeout=httpx.Timeout(self.settings.LLM_TIMEOUT)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -74,63 +77,126 @@ class LLMService:
|
||||||
Hole provider-spezifisches Template mit intelligenter Text-Kaskade.
|
Hole provider-spezifisches Template mit intelligenter Text-Kaskade.
|
||||||
HINWEIS: Dies ist nur ein Text-Lookup und verbraucht kein API-Kontingent.
|
HINWEIS: Dies ist nur ein Text-Lookup und verbraucht kein API-Kontingent.
|
||||||
Kaskade: Gewählter Provider -> Gemini (Cloud-Stil) -> Ollama (Basis-Stil).
|
Kaskade: Gewählter Provider -> Gemini (Cloud-Stil) -> Ollama (Basis-Stil).
|
||||||
|
|
||||||
WP-20 Fix: Garantiert die Rückgabe eines Strings, um AttributeError zu vermeiden.
|
WP-20 Fix: Garantiert die Rückgabe eines Strings, um AttributeError zu vermeiden.
|
||||||
"""
|
"""
|
||||||
active_provider = provider or self.settings.MINDNET_LLM_PROVIDER
|
active_provider = provider or self.settings.MINDNET_LLM_PROVIDER
|
||||||
data = self.prompts.get(key, "")
|
data = self.prompts.get(key, "")
|
||||||
|
|
||||||
if isinstance(data, dict):
|
if isinstance(data, dict):
|
||||||
# Wir versuchen erst den Provider, dann Gemini (weil ähnlich leistungsfähig), dann Ollama
|
# Wir versuchen erst den Provider, dann Gemini (weil ähnlich leistungsfähig), dann Ollama
|
||||||
val = data.get(active_provider, data.get("gemini", data.get("ollama", "")))
|
val = data.get(active_provider, data.get("gemini", data.get("ollama", "")))
|
||||||
|
|
||||||
# Falls val durch YAML-Fehler immer noch ein Dict ist, extrahiere ersten String
|
# Falls val durch YAML-Fehler immer noch ein Dict ist, extrahiere ersten String
|
||||||
if isinstance(val, dict):
|
if isinstance(val, dict):
|
||||||
logger.warning(f"⚠️ [LLMService] Nested dictionary detected for key '{key}'. Using first entry.")
|
logger.warning(f"⚠️ [LLMService] Nested dictionary detected for key '{key}'. Using first entry.")
|
||||||
val = next(iter(val.values()), "") if val else ""
|
val = next(iter(val.values()), "") if val else ""
|
||||||
return str(val)
|
return str(val)
|
||||||
|
|
||||||
return str(data)
|
return str(data)
|
||||||
|
|
||||||
async def generate_raw_response(
|
async def generate_raw_response(
|
||||||
self, prompt: str, system: str = None, force_json: bool = False,
|
self,
|
||||||
max_retries: int = 2, base_delay: float = 2.0,
|
prompt: str,
|
||||||
|
system: str = None,
|
||||||
|
force_json: bool = False,
|
||||||
|
max_retries: int = 2,
|
||||||
|
base_delay: float = 2.0,
|
||||||
priority: Literal["realtime", "background"] = "realtime",
|
priority: Literal["realtime", "background"] = "realtime",
|
||||||
provider: Optional[str] = None,
|
provider: Optional[str] = None,
|
||||||
model_override: Optional[str] = None
|
model_override: Optional[str] = None,
|
||||||
|
# --- NEW (am Ende => rückwärtskompatibel!) ---
|
||||||
|
json_schema: Optional[Dict[str, Any]] = None,
|
||||||
|
json_schema_name: str = "mindnet_json",
|
||||||
|
strict_json_schema: bool = True
|
||||||
) -> str:
|
) -> str:
|
||||||
"""Haupteinstiegspunkt für LLM-Anfragen mit Priorisierung."""
|
"""
|
||||||
|
Haupteinstiegspunkt für LLM-Anfragen mit Priorisierung.
|
||||||
|
|
||||||
|
force_json:
|
||||||
|
- Ollama: nutzt payload["format"]="json"
|
||||||
|
- Gemini: nutzt response_mime_type="application/json"
|
||||||
|
- OpenRouter: nutzt response_format=json_object (Fallback) oder json_schema (structured outputs)
|
||||||
|
|
||||||
|
json_schema + strict_json_schema (nur OpenRouter relevant):
|
||||||
|
- Wenn json_schema gesetzt ist UND force_json=True -> response_format.type="json_schema"
|
||||||
|
- strict_json_schema wird an OpenRouter/Provider weitergereicht (best effort je nach Provider)
|
||||||
|
"""
|
||||||
target_provider = provider or self.settings.MINDNET_LLM_PROVIDER
|
target_provider = provider or self.settings.MINDNET_LLM_PROVIDER
|
||||||
|
|
||||||
if priority == "background":
|
if priority == "background":
|
||||||
async with LLMService._background_semaphore:
|
async with LLMService._background_semaphore:
|
||||||
return await self._dispatch(target_provider, prompt, system, force_json, max_retries, base_delay, model_override)
|
return await self._dispatch(
|
||||||
|
target_provider,
|
||||||
return await self._dispatch(target_provider, prompt, system, force_json, max_retries, base_delay, model_override)
|
prompt,
|
||||||
|
system,
|
||||||
|
force_json,
|
||||||
|
max_retries,
|
||||||
|
base_delay,
|
||||||
|
model_override,
|
||||||
|
json_schema,
|
||||||
|
json_schema_name,
|
||||||
|
strict_json_schema
|
||||||
|
)
|
||||||
|
|
||||||
async def _dispatch(self, provider, prompt, system, force_json, max_retries, base_delay, model_override):
|
return await self._dispatch(
|
||||||
|
target_provider,
|
||||||
|
prompt,
|
||||||
|
system,
|
||||||
|
force_json,
|
||||||
|
max_retries,
|
||||||
|
base_delay,
|
||||||
|
model_override,
|
||||||
|
json_schema,
|
||||||
|
json_schema_name,
|
||||||
|
strict_json_schema
|
||||||
|
)
|
||||||
|
|
||||||
|
async def _dispatch(
|
||||||
|
self,
|
||||||
|
provider: str,
|
||||||
|
prompt: str,
|
||||||
|
system: Optional[str],
|
||||||
|
force_json: bool,
|
||||||
|
max_retries: int,
|
||||||
|
base_delay: float,
|
||||||
|
model_override: Optional[str],
|
||||||
|
json_schema: Optional[Dict[str, Any]],
|
||||||
|
json_schema_name: str,
|
||||||
|
strict_json_schema: bool
|
||||||
|
) -> str:
|
||||||
"""Routet die Anfrage an den physikalischen API-Provider."""
|
"""Routet die Anfrage an den physikalischen API-Provider."""
|
||||||
try:
|
try:
|
||||||
if provider == "openrouter" and self.openrouter_client:
|
if provider == "openrouter" and self.openrouter_client:
|
||||||
return await self._execute_openrouter(prompt, system, force_json, model_override)
|
return await self._execute_openrouter(
|
||||||
|
prompt=prompt,
|
||||||
|
system=system,
|
||||||
|
force_json=force_json,
|
||||||
|
model_override=model_override,
|
||||||
|
json_schema=json_schema,
|
||||||
|
json_schema_name=json_schema_name,
|
||||||
|
strict_json_schema=strict_json_schema
|
||||||
|
)
|
||||||
|
|
||||||
if provider == "gemini" and self.google_client:
|
if provider == "gemini" and self.google_client:
|
||||||
return await self._execute_google(prompt, system, force_json, model_override)
|
return await self._execute_google(prompt, system, force_json, model_override)
|
||||||
|
|
||||||
# Default/Fallback zu Ollama
|
# Default/Fallback zu Ollama
|
||||||
return await self._execute_ollama(prompt, system, force_json, max_retries, base_delay)
|
return await self._execute_ollama(prompt, system, force_json, max_retries, base_delay)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# QUOTEN-SCHUTZ: Wenn Cloud (OpenRouter/Gemini) fehlschlägt,
|
# QUOTEN-SCHUTZ: Wenn Cloud (OpenRouter/Gemini) fehlschlägt,
|
||||||
# gehen wir IMMER zu Ollama, niemals von OpenRouter zu Gemini.
|
# gehen wir IMMER zu Ollama, niemals von OpenRouter zu Gemini.
|
||||||
if self.settings.LLM_FALLBACK_ENABLED and provider != "ollama":
|
if self.settings.LLM_FALLBACK_ENABLED and provider != "ollama":
|
||||||
logger.warning(f"🔄 Provider {provider} failed: {e}. Falling back to LOCAL OLLAMA to protect cloud quotas.")
|
logger.warning(
|
||||||
|
f"🔄 Provider {provider} failed: {e}. Falling back to LOCAL OLLAMA to protect cloud quotas."
|
||||||
|
)
|
||||||
return await self._execute_ollama(prompt, system, force_json, max_retries, base_delay)
|
return await self._execute_ollama(prompt, system, force_json, max_retries, base_delay)
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
async def _execute_google(self, prompt, system, force_json, model_override):
|
async def _execute_google(self, prompt, system, force_json, model_override):
|
||||||
"""Native Google SDK Integration (Gemini)."""
|
"""Native Google SDK Integration (Gemini)."""
|
||||||
# Nutzt GEMINI_MODEL aus config.py falls kein override (z.B. für Ingestion) übergeben wurde
|
# Nutzt GEMINI_MODEL aus config.py falls kein override übergeben wurde
|
||||||
model = model_override or self.settings.GEMINI_MODEL
|
model = model_override or self.settings.GEMINI_MODEL
|
||||||
config = types.GenerateContentConfig(
|
config = types.GenerateContentConfig(
|
||||||
system_instruction=system,
|
system_instruction=system,
|
||||||
|
|
@ -143,19 +209,52 @@ class LLMService:
|
||||||
)
|
)
|
||||||
return response.text.strip()
|
return response.text.strip()
|
||||||
|
|
||||||
async def _execute_openrouter(self, prompt, system, force_json, model_override):
|
async def _execute_openrouter(
|
||||||
"""OpenRouter API Integration (OpenAI-kompatibel)."""
|
self,
|
||||||
# Nutzt OPENROUTER_MODEL aus config.py (v0.6.2)
|
prompt: str,
|
||||||
|
system: Optional[str],
|
||||||
|
force_json: bool,
|
||||||
|
model_override: Optional[str],
|
||||||
|
# --- NEW (optional) ---
|
||||||
|
json_schema: Optional[Dict[str, Any]] = None,
|
||||||
|
json_schema_name: str = "mindnet_json",
|
||||||
|
strict_json_schema: bool = True
|
||||||
|
) -> str:
|
||||||
|
"""
|
||||||
|
OpenRouter API Integration (OpenAI-kompatibel).
|
||||||
|
|
||||||
|
force_json=True:
|
||||||
|
- Ohne json_schema -> response_format={"type":"json_object"}
|
||||||
|
- Mit json_schema -> response_format={"type":"json_schema", "json_schema": {..., "strict": True}}
|
||||||
|
|
||||||
|
Wichtig: response_format NICHT als None senden (robuster gegenüber SDK/Provider).
|
||||||
|
"""
|
||||||
|
# Nutzt OPENROUTER_MODEL aus config.py
|
||||||
model = model_override or self.settings.OPENROUTER_MODEL
|
model = model_override or self.settings.OPENROUTER_MODEL
|
||||||
messages = []
|
messages = []
|
||||||
if system:
|
if system:
|
||||||
messages.append({"role": "system", "content": system})
|
messages.append({"role": "system", "content": system})
|
||||||
messages.append({"role": "user", "content": prompt})
|
messages.append({"role": "user", "content": prompt})
|
||||||
|
|
||||||
|
kwargs: Dict[str, Any] = {}
|
||||||
|
|
||||||
|
if force_json:
|
||||||
|
if json_schema:
|
||||||
|
kwargs["response_format"] = {
|
||||||
|
"type": "json_schema",
|
||||||
|
"json_schema": {
|
||||||
|
"name": json_schema_name,
|
||||||
|
"strict": strict_json_schema,
|
||||||
|
"schema": json_schema
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
kwargs["response_format"] = {"type": "json_object"}
|
||||||
|
|
||||||
response = await self.openrouter_client.chat.completions.create(
|
response = await self.openrouter_client.chat.completions.create(
|
||||||
model=model,
|
model=model,
|
||||||
messages=messages,
|
messages=messages,
|
||||||
response_format={"type": "json_object"} if force_json else None
|
**kwargs
|
||||||
)
|
)
|
||||||
return response.choices[0].message.content.strip()
|
return response.choices[0].message.content.strip()
|
||||||
|
|
||||||
|
|
@ -167,11 +266,13 @@ class LLMService:
|
||||||
"stream": False,
|
"stream": False,
|
||||||
"options": {
|
"options": {
|
||||||
"temperature": 0.1 if force_json else 0.7,
|
"temperature": 0.1 if force_json else 0.7,
|
||||||
"num_ctx": 8192
|
"num_ctx": 8192
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if force_json: payload["format"] = "json"
|
if force_json:
|
||||||
if system: payload["system"] = system
|
payload["format"] = "json"
|
||||||
|
if system:
|
||||||
|
payload["system"] = system
|
||||||
|
|
||||||
attempt = 0
|
attempt = 0
|
||||||
while True:
|
while True:
|
||||||
|
|
@ -193,16 +294,16 @@ class LLMService:
|
||||||
provider = self.settings.MINDNET_LLM_PROVIDER
|
provider = self.settings.MINDNET_LLM_PROVIDER
|
||||||
system_prompt = self.get_prompt("system_prompt", provider)
|
system_prompt = self.get_prompt("system_prompt", provider)
|
||||||
rag_template = self.get_prompt("rag_template", provider)
|
rag_template = self.get_prompt("rag_template", provider)
|
||||||
|
|
||||||
final_prompt = rag_template.format(context_str=context_str, query=query)
|
final_prompt = rag_template.format(context_str=context_str, query=query)
|
||||||
|
|
||||||
return await self.generate_raw_response(
|
return await self.generate_raw_response(
|
||||||
final_prompt,
|
final_prompt,
|
||||||
system=system_prompt,
|
system=system_prompt,
|
||||||
priority="realtime"
|
priority="realtime"
|
||||||
)
|
)
|
||||||
|
|
||||||
async def close(self):
|
async def close(self):
|
||||||
"""Schließt die HTTP-Verbindungen."""
|
"""Schließt die HTTP-Verbindungen."""
|
||||||
if self.ollama_client:
|
if self.ollama_client:
|
||||||
await self.ollama_client.aclose()
|
await self.ollama_client.aclose()
|
||||||
|
|
|
||||||
|
|
@ -3,7 +3,7 @@ FILE: app/services/semantic_analyzer.py
|
||||||
DESCRIPTION: KI-gestützte Kanten-Validierung. Nutzt LLM (Background-Priority), um Kanten präzise einem Chunk zuzuordnen.
|
DESCRIPTION: KI-gestützte Kanten-Validierung. Nutzt LLM (Background-Priority), um Kanten präzise einem Chunk zuzuordnen.
|
||||||
WP-20 Fix: Volle Kompatibilität mit der gehärteten LLMService (v3.3.2) Kaskade.
|
WP-20 Fix: Volle Kompatibilität mit der gehärteten LLMService (v3.3.2) Kaskade.
|
||||||
WP-22: Integration von valid_types zur Halluzinations-Vermeidung.
|
WP-22: Integration von valid_types zur Halluzinations-Vermeidung.
|
||||||
VERSION: 2.2.2
|
VERSION: 2.2.3
|
||||||
STATUS: Active
|
STATUS: Active
|
||||||
DEPENDENCIES: app.services.llm_service, app.services.edge_registry, json, logging
|
DEPENDENCIES: app.services.llm_service, app.services.edge_registry, json, logging
|
||||||
LAST_ANALYSIS: 2025-12-23
|
LAST_ANALYSIS: 2025-12-23
|
||||||
|
|
@ -29,26 +29,39 @@ class SemanticAnalyzer:
|
||||||
"""
|
"""
|
||||||
Prüft, ob ein String eine valide Kante im Format 'kind:target' ist.
|
Prüft, ob ein String eine valide Kante im Format 'kind:target' ist.
|
||||||
Verhindert, dass LLM-Geschwätz als Kante durchrutscht.
|
Verhindert, dass LLM-Geschwätz als Kante durchrutscht.
|
||||||
|
|
||||||
|
WP-22 Erweiterung:
|
||||||
|
- kind muss (wenn valid_types verfügbar) im kontrollierten Vokabular enthalten sein.
|
||||||
"""
|
"""
|
||||||
if not isinstance(edge_str, str) or ":" not in edge_str:
|
if not isinstance(edge_str, str) or ":" not in edge_str:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
parts = edge_str.split(":", 1)
|
parts = edge_str.split(":", 1)
|
||||||
kind = parts[0].strip()
|
kind = parts[0].strip()
|
||||||
target = parts[1].strip()
|
target = parts[1].strip()
|
||||||
|
|
||||||
# Regel 1: Ein 'kind' (Beziehungstyp) darf keine Leerzeichen enthalten.
|
# Regel 1: Ein 'kind' (Beziehungstyp) darf keine Leerzeichen enthalten.
|
||||||
if " " in kind:
|
if " " in kind:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
# Regel 2: Plausible Länge für den Typ
|
# Regel 2: Plausible Länge für den Typ
|
||||||
if len(kind) > 40 or len(kind) < 2:
|
if len(kind) > 40 or len(kind) < 2:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
# Regel 3: Target darf nicht leer sein
|
# Regel 3: Target darf nicht leer sein
|
||||||
if not target:
|
if not target:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
# WP-22: kontrolliertes Vokabular erzwingen (falls vorhanden/geladen)
|
||||||
|
try:
|
||||||
|
if hasattr(edge_registry, "valid_types") and edge_registry.valid_types:
|
||||||
|
if kind not in edge_registry.valid_types:
|
||||||
|
return False
|
||||||
|
except Exception:
|
||||||
|
# Bei Registry-Problemen lieber nicht crashen -> konservativ: ablehnen wäre auch möglich,
|
||||||
|
# aber wir bleiben kompatibel und robust.
|
||||||
|
pass
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
async def assign_edges_to_chunk(self, chunk_text: str, all_edges: List[str], note_type: str) -> List[str]:
|
async def assign_edges_to_chunk(self, chunk_text: str, all_edges: List[str], note_type: str) -> List[str]:
|
||||||
|
|
@ -61,7 +74,7 @@ class SemanticAnalyzer:
|
||||||
|
|
||||||
# 1. Prompt laden via get_prompt (handelt die Provider-Kaskade automatisch ab)
|
# 1. Prompt laden via get_prompt (handelt die Provider-Kaskade automatisch ab)
|
||||||
prompt_template = self.llm.get_prompt("edge_allocation_template")
|
prompt_template = self.llm.get_prompt("edge_allocation_template")
|
||||||
|
|
||||||
# Sicherheits-Check für die Format-Methode
|
# Sicherheits-Check für die Format-Methode
|
||||||
if not prompt_template or isinstance(prompt_template, dict):
|
if not prompt_template or isinstance(prompt_template, dict):
|
||||||
logger.warning("⚠️ [SemanticAnalyzer] Prompt 'edge_allocation_template' konnte nicht als String geladen werden. Nutze Not-Fallback.")
|
logger.warning("⚠️ [SemanticAnalyzer] Prompt 'edge_allocation_template' konnte nicht als String geladen werden. Nutze Not-Fallback.")
|
||||||
|
|
@ -77,14 +90,14 @@ class SemanticAnalyzer:
|
||||||
edge_registry.ensure_latest()
|
edge_registry.ensure_latest()
|
||||||
valid_types_str = ", ".join(sorted(list(edge_registry.valid_types)))
|
valid_types_str = ", ".join(sorted(list(edge_registry.valid_types)))
|
||||||
edges_str = "\n".join([f"- {e}" for e in all_edges])
|
edges_str = "\n".join([f"- {e}" for e in all_edges])
|
||||||
|
|
||||||
# LOG: Request Info
|
# LOG: Request Info
|
||||||
logger.debug(f"🔍 [SemanticAnalyzer] Request: {len(chunk_text)} chars Text, {len(all_edges)} Candidates.")
|
logger.debug(f"🔍 [SemanticAnalyzer] Request: {len(chunk_text)} chars Text, {len(all_edges)} Candidates.")
|
||||||
|
|
||||||
# 3. Prompt füllen (FIX: valid_types hinzugefügt, um FormatError zu beheben)
|
# 3. Prompt füllen (FIX: valid_types hinzugefügt, um FormatError zu beheben)
|
||||||
try:
|
try:
|
||||||
final_prompt = prompt_template.format(
|
final_prompt = prompt_template.format(
|
||||||
chunk_text=chunk_text[:3500],
|
chunk_text=chunk_text[:3500],
|
||||||
edge_list=edges_str,
|
edge_list=edges_str,
|
||||||
valid_types=valid_types_str
|
valid_types=valid_types_str
|
||||||
)
|
)
|
||||||
|
|
@ -94,10 +107,11 @@ class SemanticAnalyzer:
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# 4. LLM Call mit Traffic Control (Background Priority)
|
# 4. LLM Call mit Traffic Control (Background Priority)
|
||||||
|
# NOTE: Keine neuen Parameter hier, damit es mit deinem aktuellen llm_service.py kompatibel bleibt.
|
||||||
response_json = await self.llm.generate_raw_response(
|
response_json = await self.llm.generate_raw_response(
|
||||||
prompt=final_prompt,
|
prompt=final_prompt,
|
||||||
force_json=True,
|
force_json=True,
|
||||||
max_retries=5,
|
max_retries=5,
|
||||||
base_delay=5.0,
|
base_delay=5.0,
|
||||||
priority="background"
|
priority="background"
|
||||||
)
|
)
|
||||||
|
|
@ -107,8 +121,8 @@ class SemanticAnalyzer:
|
||||||
|
|
||||||
# 5. Parsing & Cleaning
|
# 5. Parsing & Cleaning
|
||||||
clean_json = response_json.replace("```json", "").replace("```", "").strip()
|
clean_json = response_json.replace("```json", "").replace("```", "").strip()
|
||||||
|
|
||||||
if not clean_json:
|
if not clean_json:
|
||||||
logger.warning("⚠️ [SemanticAnalyzer] Leere Antwort vom LLM erhalten.")
|
logger.warning("⚠️ [SemanticAnalyzer] Leere Antwort vom LLM erhalten.")
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
|
@ -122,15 +136,15 @@ class SemanticAnalyzer:
|
||||||
|
|
||||||
# 6. Robuste Validierung (List vs Dict)
|
# 6. Robuste Validierung (List vs Dict)
|
||||||
raw_candidates = []
|
raw_candidates = []
|
||||||
|
|
||||||
if isinstance(data, list):
|
if isinstance(data, list):
|
||||||
raw_candidates = data
|
raw_candidates = data
|
||||||
|
|
||||||
elif isinstance(data, dict):
|
elif isinstance(data, dict):
|
||||||
logger.info(f"ℹ️ [SemanticAnalyzer] LLM lieferte Dict statt Liste. Versuche Reparatur.")
|
logger.info(f"ℹ️ [SemanticAnalyzer] LLM lieferte Dict statt Liste. Versuche Reparatur.")
|
||||||
for key, val in data.items():
|
for key, val in data.items():
|
||||||
if key.lower() in ["edges", "results", "kanten", "matches"] and isinstance(val, list):
|
if key.lower() in ["edges", "results", "kanten", "matches"] and isinstance(val, list):
|
||||||
raw_candidates.extend(val)
|
raw_candidates.extend(val)
|
||||||
elif isinstance(val, str):
|
elif isinstance(val, str):
|
||||||
raw_candidates.append(f"{key}:{val}")
|
raw_candidates.append(f"{key}:{val}")
|
||||||
elif isinstance(val, list):
|
elif isinstance(val, list):
|
||||||
|
|
@ -147,7 +161,7 @@ class SemanticAnalyzer:
|
||||||
logger.debug(f" [SemanticAnalyzer] Invalid edge format rejected: '{e_str}'")
|
logger.debug(f" [SemanticAnalyzer] Invalid edge format rejected: '{e_str}'")
|
||||||
|
|
||||||
final_result = [e for e in valid_edges if ":" in e]
|
final_result = [e for e in valid_edges if ":" in e]
|
||||||
|
|
||||||
if final_result:
|
if final_result:
|
||||||
logger.info(f"✅ [SemanticAnalyzer] Success. {len(final_result)} Kanten zugewiesen.")
|
logger.info(f"✅ [SemanticAnalyzer] Success. {len(final_result)} Kanten zugewiesen.")
|
||||||
else:
|
else:
|
||||||
|
|
@ -169,4 +183,4 @@ def get_semantic_analyzer():
|
||||||
global _analyzer_instance
|
global _analyzer_instance
|
||||||
if _analyzer_instance is None:
|
if _analyzer_instance is None:
|
||||||
_analyzer_instance = SemanticAnalyzer()
|
_analyzer_instance = SemanticAnalyzer()
|
||||||
return _analyzer_instance
|
return _analyzer_instance
|
||||||
|
|
|
||||||
|
|
@ -184,13 +184,13 @@ edge_allocation_template:
|
||||||
ERLAUBTE TYPEN: {valid_types}
|
ERLAUBTE TYPEN: {valid_types}
|
||||||
TEXT: {chunk_text}
|
TEXT: {chunk_text}
|
||||||
KANDIDATEN: {edge_list}
|
KANDIDATEN: {edge_list}
|
||||||
OUTPUT: STRIKT eine flache JSON-Liste ["typ:ziel"]. Keine Objekte!
|
OUTPUT: STRIKT eine flache JSON-Liste ["typ:ziel"]. Kein Text davor/danach. Wenn nichts: []. Keine Objekte!
|
||||||
openrouter: |
|
openrouter: |
|
||||||
Filtere relevante Kanten.
|
Filtere relevante Kanten.
|
||||||
ERLAUBTE TYPEN: {valid_types}
|
ERLAUBTE TYPEN: {valid_types}
|
||||||
TEXT: {chunk_text}
|
TEXT: {chunk_text}
|
||||||
KANDIDATEN: {edge_list}
|
KANDIDATEN: {edge_list}
|
||||||
Output: JSON-Liste ["typ:ziel"].
|
OUTPUT: STRIKT JSON-Liste ["typ:ziel"]. Kein Text davor/danach. Wenn nichts: [].
|
||||||
|
|
||||||
# ---------------------------------------------------------
|
# ---------------------------------------------------------
|
||||||
# 7. SMART EDGE ALLOCATION: Extraktion (Intent: INGEST)
|
# 7. SMART EDGE ALLOCATION: Extraktion (Intent: INGEST)
|
||||||
|
|
@ -221,9 +221,9 @@ edge_extraction:
|
||||||
Analysiere '{note_id}'. Extrahiere semantische Beziehungen.
|
Analysiere '{note_id}'. Extrahiere semantische Beziehungen.
|
||||||
ERLAUBTE TYPEN: {valid_types}
|
ERLAUBTE TYPEN: {valid_types}
|
||||||
TEXT: {text}
|
TEXT: {text}
|
||||||
OUTPUT: STRIKT JSON-Liste von Objekten: [[{{"to": "Ziel", "kind": "typ"}}]]. Keine Erklärungen!
|
OUTPUT: STRIKT JSON-Array von Objekten: [{{"to":"Ziel","kind":"typ"}}]. Kein Text davor/danach. Wenn nichts: [].
|
||||||
openrouter: |
|
openrouter: |
|
||||||
Wissensgraph-Extraktion für '{note_id}'.
|
Wissensgraph-Extraktion für '{note_id}'.
|
||||||
ERLAUBTE TYPEN: {valid_types}
|
ERLAUBTE TYPEN: {valid_types}
|
||||||
TEXT: {text}
|
TEXT: {text}
|
||||||
OUTPUT: STRIKT JSON-Liste von Objekten: [[{{"to": "X", "kind": "Y"}}]]. Keine Dictionaries mit Schlüsseln wie 'edges'!
|
OUTPUT: STRIKT JSON-Array von Objekten: [{{"to":"X","kind":"Y"}}]. Kein Text davor/danach. Wenn nichts: []. Keine Wrapper-Objekte (z.B. kein Top-Level-Key 'edges').
|
||||||
Loading…
Reference in New Issue
Block a user