mindnet/app/core/registry.py

"""
FILE: app/core/registry.py
DESCRIPTION: Zentraler Base-Layer für Konfigurations-Loading und Text-Bereinigung.
             Bricht Zirkelbezüge zwischen Ingestion und LLMService auf.
VERSION: 1.0.0
"""
import os
import yaml
from typing import Optional, List

def load_type_registry(custom_path: Optional[str] = None) -> dict:
    """Lädt die types.yaml zur Steuerung der typ-spezifischen Logik."""
    # Wir nutzen hier einen direkten Import von Settings, um Zyklen zu vermeiden
    from app.config import get_settings
    settings = get_settings()
    path = custom_path or settings.MINDNET_TYPES_FILE
    if not os.path.exists(path):
        return {}
    try:
        with open(path, "r", encoding="utf-8") as f:
            return yaml.safe_load(f) or {}
    except Exception:
        return {}

def clean_llm_text(text: str, registry: Optional[dict] = None) -> str:
    """
    Entfernt LLM-Steuerzeichen (<s>, [OUT] etc.) aus einem Text.
    Wird sowohl für JSON-Parsing als auch für Chat-Antworten genutzt.
    """
    if not text or not isinstance(text, str):
        return ""

    default_patterns = ["<s>", "</s>", "[OUT]", "[/OUT]"]
    reg = registry or load_type_registry()

    # Lade Patterns aus llm_settings (WP-14)
    patterns: List[str] = reg.get("llm_settings", {}).get("cleanup_patterns", default_patterns)

    clean = text
    for p in patterns:
        clean = clean.replace(p, "")

    return clean.strip()