""" FILE: app/core/ingestion/ingestion_utils.py DESCRIPTION: Hilfswerkzeuge für JSON-Recovery, Typ-Registry und Konfigurations-Lookups. """ import os import json import re import yaml from typing import Any, Optional, Dict def extract_json_from_response(text: str) -> Any: """ Extrahiert JSON-Daten und bereinigt LLM-Steuerzeichen (v2.11.14 Logic). Entfernt , [OUT], [/OUT] und Markdown-Blöcke für maximale Robustheit. """ if not text or not isinstance(text, str): return [] clean = text.replace("", "").replace("", "") clean = clean.replace("[OUT]", "").replace("[/OUT]", "") clean = clean.strip() match = re.search(r"```(?:json)?\s*(.*?)\s*```", clean, re.DOTALL) payload = match.group(1) if match else clean try: return json.loads(payload.strip()) except json.JSONDecodeError: # Recovery: Suche nach Liste start = payload.find('[') end = payload.rfind(']') + 1 if start != -1 and end > start: try: return json.loads(payload[start:end]) except: pass # Recovery: Suche nach Objekt start_obj = payload.find('{') end_obj = payload.rfind('}') + 1 if start_obj != -1 and end_obj > start_obj: try: return json.loads(payload[start_obj:end_obj]) except: pass return [] def load_type_registry(custom_path: Optional[str] = None) -> dict: """Lädt die types.yaml zur Steuerung der typ-spezifischen Ingestion.""" from app.config import get_settings settings = get_settings() path = custom_path or settings.MINDNET_TYPES_FILE if not os.path.exists(path): return {} try: with open(path, "r", encoding="utf-8") as f: return yaml.safe_load(f) or {} except Exception: return {} def resolve_note_type(registry: dict, requested: Optional[str]) -> str: """Bestimmt den finalen Notiz-Typ (Fallback auf 'concept').""" types = registry.get("types", {}) if requested and requested in types: return requested return "concept" def get_chunk_config_by_profile(registry: dict, profile_name: str, note_type: str) -> Dict[str, Any]: """Holt die Chunker-Parameter für ein spezifisches Profil aus der Registry.""" from app.core.chunking import get_chunk_config profiles = registry.get("chunking_profiles", {}) if profile_name in profiles: cfg = profiles[profile_name].copy() if "overlap" in cfg and isinstance(cfg["overlap"], list): cfg["overlap"] = tuple(cfg["overlap"]) return cfg return get_chunk_config(note_type)