From afc90fe992bcb22ae7d972a2eb45d2e8fe020280 Mon Sep 17 00:00:00 2001 From: Lars Date: Sat, 8 Nov 2025 14:23:53 +0100 Subject: [PATCH] app/core/type_registry.py aktualisiert --- app/core/type_registry.py | 160 +++++++++++++++++++++++--------------- 1 file changed, 96 insertions(+), 64 deletions(-) diff --git a/app/core/type_registry.py b/app/core/type_registry.py index f959e7e..93b17a0 100644 --- a/app/core/type_registry.py +++ b/app/core/type_registry.py @@ -1,79 +1,111 @@ -# -*- coding: utf-8 -*- """ -app/core/type_registry.py — mindnet · WP-03 (Version 1.0.0) +type_registry.py v1.0.0 Zweck: - - Lädt eine optionale Typen-Registry aus config/types.yaml|json. - - Liefert pro `type` eine Konfiguration (z. B. Chunk-Profile, Standard-Edges, - Retriever-Gewichtungen). Wird in import/chunk/edges integriert. + - Optionale, konfigurierbare Type-Registry laden (YAML/JSON), um pro "type" + (aus Frontmatter) Chunk-Profile, Default-Edges und optionale + Retriever-Gewichte bereitzustellen – ohne bestehende Funktionen zu brechen. -Verhalten: - - Fehlt die Datei oder der Typ → es werden Defaults genutzt. - - Registry wird gecacht (lazy-load). +Kompatibilität: + - Keine Abhängigkeiten von anderen Modulen. + - Keine harten Fehler, wenn Registry fehlt oder unvollständig ist. + +Nutzung: + from app.core.type_registry import ( + load_type_registry, resolve_chunk_profile, get_edge_defaults_for_type, + get_retriever_weight_for_type + ) + +Umgebungsvariablen: + TYPE_REGISTRY_PATH (default: "config/types.yaml") + +Format (Beispiel): + version: 1.0 + types: + concept: + chunk_profile: long + edge_defaults: [references, related_to] + retriever_weight: 1.0 + task: + chunk_profile: short + edge_defaults: [depends_on, belongs_to] + retriever_weight: 0.8 + experience: + chunk_profile: medium + edge_defaults: [derived_from, inspired_by] + retriever_weight: 0.9 """ -__version__ = "1.0.0" - +from __future__ import annotations import os import json -from typing import Any, Dict +from typing import Any, Dict, List, Optional + try: import yaml # type: ignore -except Exception: - yaml = None # optional +except Exception: # yaml ist optional; JSON wird ebenfalls unterstützt + yaml = None # type: ignore -_CACHE: Dict[str, Any] = {} +_CACHE: Dict[str, Dict[str, Any]] = {} -DEFAULT_REG = { - "version": "1.0", - "types": { - "concept": { - "chunk_profile": "medium", - "edge_defaults": ["references", "related_to"], - "retriever_weight": 1.0 - }, - "task": { - "chunk_profile": "short", - "edge_defaults": ["depends_on", "belongs_to"], - "retriever_weight": 0.8 - }, - "experience": { - "chunk_profile": "medium", - "edge_defaults": ["derived_from"], - "retriever_weight": 0.9 - } - } -} - -def load_type_registry(path: str = "config/types.yaml", silent: bool = False) -> Dict[str, Any]: - global _CACHE - if _CACHE: - return _CACHE - - def load_yaml(p: str) -> Dict[str, Any]: - if yaml is None: - return {} - with open(p, "r", encoding="utf-8") as f: - return yaml.safe_load(f) or {} - - def load_json(p: str) -> Dict[str, Any]: - with open(p, "r", encoding="utf-8") as f: - return json.load(f) or {} - - reg: Dict[str, Any] = {} - if os.path.exists(path): +def _safe_load_yaml_or_json(path: str) -> Dict[str, Any]: + if not os.path.exists(path): + return {} + try: + with open(path, "r", encoding="utf-8") as f: + data = f.read() + except Exception: + return {} + # YAML bevorzugen, wenn verfügbar und Datei nach YAML aussieht + if path.lower().endswith((".yaml", ".yml")) and yaml is not None: try: - if path.endswith(".yaml") or path.endswith(".yml"): - reg = load_yaml(path) - elif path.endswith(".json"): - reg = load_json(path) - except Exception as e: - if not silent: - print(f"[type_registry] WARN: failed to load {path}: {e}") + return yaml.safe_load(data) or {} + except Exception: + return {} + # JSON fallback + try: + return json.loads(data) + except Exception: + return {} - if not reg: - # Fallback - reg = DEFAULT_REG +def load_type_registry(path: Optional[str] = None) -> Dict[str, Any]: + """ + Lädt einmalig die Registry und cached sie. + Fehlt sie, wird ein leeres Dict geliefert (keine Fehler). + """ + key = path or os.getenv("TYPE_REGISTRY_PATH", "config/types.yaml") + if key in _CACHE: + return _CACHE[key] + obj = _safe_load_yaml_or_json(key) + if not isinstance(obj, dict): + obj = {} + _CACHE[key] = obj + return obj - _CACHE = reg - return _CACHE +def _types_map(reg: Dict[str, Any]) -> Dict[str, Any]: + return reg.get("types", {}) if isinstance(reg, dict) else {} + +def resolve_chunk_profile(note_type: str, default_profile: str = "default") -> str: + reg = load_type_registry() + tmap = _types_map(reg) + entry = tmap.get(note_type, {}) + return str(entry.get("chunk_profile", default_profile)) + +def get_edge_defaults_for_type(note_type: str) -> List[str]: + reg = load_type_registry() + tmap = _types_map(reg) + entry = tmap.get(note_type, {}) + v = entry.get("edge_defaults", []) + if not isinstance(v, list): + return [] + return [str(x) for x in v] + +def get_retriever_weight_for_type(note_type: str) -> Optional[float]: + reg = load_type_registry() + tmap = _types_map(reg) + entry = tmap.get(note_type, {}) + v = entry.get("retriever_weight", None) + try: + return float(v) if v is not None else None + except Exception: + return None