bug fix
This commit is contained in:
parent
e045371969
commit
37ec8b614e
|
|
@ -1,46 +1,25 @@
|
||||||
"""
|
"""
|
||||||
FILE: app/core/ingestion/ingestion_utils.py
|
FILE: app/core/ingestion/ingestion_utils.py
|
||||||
DESCRIPTION: Hilfswerkzeuge für JSON-Recovery, Typ-Registry und Konfigurations-Lookups.
|
DESCRIPTION: Hilfswerkzeuge für JSON-Recovery, Typ-Registry und Konfigurations-Lookups.
|
||||||
AUDIT v2.13.8: Zentralisierung der Text-Bereinigung für LLM-Antworten.
|
AUDIT v2.13.9: Behebung des Circular Imports durch Nutzung der app.core.registry.
|
||||||
"""
|
"""
|
||||||
import os
|
|
||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
import yaml
|
from typing import Any, Optional, Dict
|
||||||
from typing import Any, Optional, Dict, List
|
|
||||||
|
|
||||||
def clean_llm_text(text: str, registry: Optional[dict] = None) -> str:
|
# ENTSCHEIDENDER FIX: Import der Basis-Logik aus dem neutralen Registry-Modul.
|
||||||
"""
|
# Dies bricht den Zirkelbezug auf, da dieses Modul keine Services mehr importiert.
|
||||||
Entfernt LLM-Steuerzeichen und Artefakte aus einem Text.
|
from app.core.registry import load_type_registry, clean_llm_text
|
||||||
Nutzt die cleanup_patterns aus der Registry oder Standardwerte.
|
|
||||||
"""
|
|
||||||
if not text or not isinstance(text, str):
|
|
||||||
return ""
|
|
||||||
|
|
||||||
# Fallback-Patterns, falls die Registry nicht greift
|
|
||||||
default_patterns = ["<s>", "</s>", "[OUT]", "[/OUT]"]
|
|
||||||
|
|
||||||
# Falls keine Registry übergeben wurde, versuchen wir sie zu laden
|
|
||||||
reg = registry or load_type_registry()
|
|
||||||
|
|
||||||
# Lade Patterns aus llm_settings (WP-14 Erweiterung)
|
|
||||||
patterns: List[str] = reg.get("llm_settings", {}).get("cleanup_patterns", default_patterns)
|
|
||||||
|
|
||||||
clean = text
|
|
||||||
for p in patterns:
|
|
||||||
clean = clean.replace(p, "")
|
|
||||||
|
|
||||||
return clean.strip()
|
|
||||||
|
|
||||||
def extract_json_from_response(text: str, registry: Optional[dict] = None) -> Any:
|
def extract_json_from_response(text: str, registry: Optional[dict] = None) -> Any:
|
||||||
"""
|
"""
|
||||||
Extrahiert JSON-Daten und bereinigt LLM-Steuerzeichen.
|
Extrahiert JSON-Daten und bereinigt LLM-Steuerzeichen.
|
||||||
WP-14: Nutzt nun die zentrale clean_llm_text Funktion.
|
WP-14: Nutzt nun die zentrale clean_llm_text Funktion aus app.core.registry.
|
||||||
"""
|
"""
|
||||||
if not text:
|
if not text:
|
||||||
return []
|
return []
|
||||||
|
|
||||||
# 1. Text zentral bereinigen
|
# 1. Text zentral bereinigen via neutralem Modul
|
||||||
clean = clean_llm_text(text, registry)
|
clean = clean_llm_text(text, registry)
|
||||||
|
|
||||||
# 2. Markdown-Code-Blöcke extrahieren
|
# 2. Markdown-Code-Blöcke extrahieren
|
||||||
|
|
@ -65,16 +44,6 @@ def extract_json_from_response(text: str, registry: Optional[dict] = None) -> An
|
||||||
except: pass
|
except: pass
|
||||||
return []
|
return []
|
||||||
|
|
||||||
def load_type_registry(custom_path: Optional[str] = None) -> dict:
|
|
||||||
"""Lädt die types.yaml zur Steuerung der typ-spezifischen Ingestion."""
|
|
||||||
from app.config import get_settings
|
|
||||||
settings = get_settings()
|
|
||||||
path = custom_path or settings.MINDNET_TYPES_FILE
|
|
||||||
if not os.path.exists(path): return {}
|
|
||||||
try:
|
|
||||||
with open(path, "r", encoding="utf-8") as f: return yaml.safe_load(f) or {}
|
|
||||||
except Exception: return {}
|
|
||||||
|
|
||||||
def resolve_note_type(registry: dict, requested: Optional[str]) -> str:
|
def resolve_note_type(registry: dict, requested: Optional[str]) -> str:
|
||||||
"""
|
"""
|
||||||
Bestimmt den finalen Notiz-Typ.
|
Bestimmt den finalen Notiz-Typ.
|
||||||
|
|
@ -89,7 +58,9 @@ def resolve_note_type(registry: dict, requested: Optional[str]) -> str:
|
||||||
return ingest_cfg.get("default_note_type", "concept")
|
return ingest_cfg.get("default_note_type", "concept")
|
||||||
|
|
||||||
def get_chunk_config_by_profile(registry: dict, profile_name: str, note_type: str) -> Dict[str, Any]:
|
def get_chunk_config_by_profile(registry: dict, profile_name: str, note_type: str) -> Dict[str, Any]:
|
||||||
"""Holt die Chunker-Parameter für ein spezifisches Profil aus der Registry."""
|
"""
|
||||||
|
Holt die Chunker-Parameter für ein spezifisches Profil aus der Registry.
|
||||||
|
"""
|
||||||
from app.core.chunking import get_chunk_config
|
from app.core.chunking import get_chunk_config
|
||||||
profiles = registry.get("chunking_profiles", {})
|
profiles = registry.get("chunking_profiles", {})
|
||||||
if profile_name in profiles:
|
if profile_name in profiles:
|
||||||
|
|
|
||||||
43
app/core/registry.py
Normal file
43
app/core/registry.py
Normal file
|
|
@ -0,0 +1,43 @@
|
||||||
|
"""
|
||||||
|
FILE: app/core/registry.py
|
||||||
|
DESCRIPTION: Zentraler Base-Layer für Konfigurations-Loading und Text-Bereinigung.
|
||||||
|
Bricht Zirkelbezüge zwischen Ingestion und LLMService auf.
|
||||||
|
VERSION: 1.0.0
|
||||||
|
"""
|
||||||
|
import os
|
||||||
|
import yaml
|
||||||
|
from typing import Optional, List
|
||||||
|
|
||||||
|
def load_type_registry(custom_path: Optional[str] = None) -> dict:
|
||||||
|
"""Lädt die types.yaml zur Steuerung der typ-spezifischen Logik."""
|
||||||
|
# Wir nutzen hier einen direkten Import von Settings, um Zyklen zu vermeiden
|
||||||
|
from app.config import get_settings
|
||||||
|
settings = get_settings()
|
||||||
|
path = custom_path or settings.MINDNET_TYPES_FILE
|
||||||
|
if not os.path.exists(path):
|
||||||
|
return {}
|
||||||
|
try:
|
||||||
|
with open(path, "r", encoding="utf-8") as f:
|
||||||
|
return yaml.safe_load(f) or {}
|
||||||
|
except Exception:
|
||||||
|
return {}
|
||||||
|
|
||||||
|
def clean_llm_text(text: str, registry: Optional[dict] = None) -> str:
|
||||||
|
"""
|
||||||
|
Entfernt LLM-Steuerzeichen (<s>, [OUT] etc.) aus einem Text.
|
||||||
|
Wird sowohl für JSON-Parsing als auch für Chat-Antworten genutzt.
|
||||||
|
"""
|
||||||
|
if not text or not isinstance(text, str):
|
||||||
|
return ""
|
||||||
|
|
||||||
|
default_patterns = ["<s>", "</s>", "[OUT]", "[/OUT]"]
|
||||||
|
reg = registry or load_type_registry()
|
||||||
|
|
||||||
|
# Lade Patterns aus llm_settings (WP-14)
|
||||||
|
patterns: List[str] = reg.get("llm_settings", {}).get("cleanup_patterns", default_patterns)
|
||||||
|
|
||||||
|
clean = text
|
||||||
|
for p in patterns:
|
||||||
|
clean = clean.replace(p, "")
|
||||||
|
|
||||||
|
return clean.strip()
|
||||||
|
|
@ -6,11 +6,11 @@ DESCRIPTION: Hybrid-Client für Ollama, Google GenAI (Gemini) und OpenRouter.
|
||||||
WP-20 Fix: Bulletproof Prompt-Auflösung für format() Aufrufe.
|
WP-20 Fix: Bulletproof Prompt-Auflösung für format() Aufrufe.
|
||||||
WP-22/JSON: Optionales JSON-Schema + strict (für OpenRouter structured outputs).
|
WP-22/JSON: Optionales JSON-Schema + strict (für OpenRouter structured outputs).
|
||||||
FIX: Intelligente Rate-Limit Erkennung (429 Handling), v1-API Sync & Timeouts.
|
FIX: Intelligente Rate-Limit Erkennung (429 Handling), v1-API Sync & Timeouts.
|
||||||
VERSION: 3.3.8
|
VERSION: 3.3.9
|
||||||
STATUS: Active
|
STATUS: Active
|
||||||
FIX:
|
FIX:
|
||||||
- Integriert clean_llm_text zur Entfernung von Steuerzeichen (<s>, [OUT] etc.) in Antworten.
|
- Importiert clean_llm_text von app.core.registry zur Vermeidung von Circular Imports.
|
||||||
- Stellt sicher, dass Chat-Antworten sauber formatiert ausgegeben werden.
|
- Wendet clean_llm_text auf Text-Antworten in generate_raw_response an.
|
||||||
"""
|
"""
|
||||||
import httpx
|
import httpx
|
||||||
import yaml
|
import yaml
|
||||||
|
|
@ -24,8 +24,8 @@ from pathlib import Path
|
||||||
from typing import Optional, Dict, Any, Literal
|
from typing import Optional, Dict, Any, Literal
|
||||||
from app.config import get_settings
|
from app.config import get_settings
|
||||||
|
|
||||||
# Import der zentralen Bereinigungs-Logik (WP-14 Fix)
|
# ENTSCHEIDENDER FIX: Import der neutralen Bereinigungs-Logik (WP-14)
|
||||||
from app.core.ingestion.ingestion_utils import clean_llm_text
|
from app.core.registry import clean_llm_text
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
@ -140,6 +140,7 @@ class LLMService:
|
||||||
max_retries, base_delay, model_override,
|
max_retries, base_delay, model_override,
|
||||||
json_schema, json_schema_name, strict_json_schema
|
json_schema, json_schema_name, strict_json_schema
|
||||||
)
|
)
|
||||||
|
# WP-14 Fix: Bereinige Text-Antworten vor Rückgabe
|
||||||
return clean_llm_text(res) if not force_json else res
|
return clean_llm_text(res) if not force_json else res
|
||||||
|
|
||||||
async def _dispatch(
|
async def _dispatch(
|
||||||
|
|
@ -212,7 +213,6 @@ class LLMService:
|
||||||
|
|
||||||
config = types.GenerateContentConfig(
|
config = types.GenerateContentConfig(
|
||||||
system_instruction=system,
|
system_instruction=system,
|
||||||
# WICHTIG: Gemini 1.5+ unterstützt response_mime_type nativ
|
|
||||||
response_mime_type="application/json" if force_json else "text/plain"
|
response_mime_type="application/json" if force_json else "text/plain"
|
||||||
)
|
)
|
||||||
response = await asyncio.wait_for(
|
response = await asyncio.wait_for(
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user