app/core/type_registry.py aktualisiert
All checks were successful
Deploy mindnet to llm-node / deploy (push) Successful in 3s
All checks were successful
Deploy mindnet to llm-node / deploy (push) Successful in 3s
This commit is contained in:
parent
2f9ce824a0
commit
afc90fe992
|
|
@ -1,79 +1,111 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
app/core/type_registry.py — mindnet · WP-03 (Version 1.0.0)
|
||||
type_registry.py v1.0.0
|
||||
|
||||
Zweck:
|
||||
- Lädt eine optionale Typen-Registry aus config/types.yaml|json.
|
||||
- Liefert pro `type` eine Konfiguration (z. B. Chunk-Profile, Standard-Edges,
|
||||
Retriever-Gewichtungen). Wird in import/chunk/edges integriert.
|
||||
- Optionale, konfigurierbare Type-Registry laden (YAML/JSON), um pro "type"
|
||||
(aus Frontmatter) Chunk-Profile, Default-Edges und optionale
|
||||
Retriever-Gewichte bereitzustellen – ohne bestehende Funktionen zu brechen.
|
||||
|
||||
Verhalten:
|
||||
- Fehlt die Datei oder der Typ → es werden Defaults genutzt.
|
||||
- Registry wird gecacht (lazy-load).
|
||||
Kompatibilität:
|
||||
- Keine Abhängigkeiten von anderen Modulen.
|
||||
- Keine harten Fehler, wenn Registry fehlt oder unvollständig ist.
|
||||
|
||||
Nutzung:
|
||||
from app.core.type_registry import (
|
||||
load_type_registry, resolve_chunk_profile, get_edge_defaults_for_type,
|
||||
get_retriever_weight_for_type
|
||||
)
|
||||
|
||||
Umgebungsvariablen:
|
||||
TYPE_REGISTRY_PATH (default: "config/types.yaml")
|
||||
|
||||
Format (Beispiel):
|
||||
version: 1.0
|
||||
types:
|
||||
concept:
|
||||
chunk_profile: long
|
||||
edge_defaults: [references, related_to]
|
||||
retriever_weight: 1.0
|
||||
task:
|
||||
chunk_profile: short
|
||||
edge_defaults: [depends_on, belongs_to]
|
||||
retriever_weight: 0.8
|
||||
experience:
|
||||
chunk_profile: medium
|
||||
edge_defaults: [derived_from, inspired_by]
|
||||
retriever_weight: 0.9
|
||||
"""
|
||||
|
||||
__version__ = "1.0.0"
|
||||
|
||||
from __future__ import annotations
|
||||
import os
|
||||
import json
|
||||
from typing import Any, Dict
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
try:
|
||||
import yaml # type: ignore
|
||||
except Exception:
|
||||
yaml = None # optional
|
||||
except Exception: # yaml ist optional; JSON wird ebenfalls unterstützt
|
||||
yaml = None # type: ignore
|
||||
|
||||
_CACHE: Dict[str, Any] = {}
|
||||
_CACHE: Dict[str, Dict[str, Any]] = {}
|
||||
|
||||
DEFAULT_REG = {
|
||||
"version": "1.0",
|
||||
"types": {
|
||||
"concept": {
|
||||
"chunk_profile": "medium",
|
||||
"edge_defaults": ["references", "related_to"],
|
||||
"retriever_weight": 1.0
|
||||
},
|
||||
"task": {
|
||||
"chunk_profile": "short",
|
||||
"edge_defaults": ["depends_on", "belongs_to"],
|
||||
"retriever_weight": 0.8
|
||||
},
|
||||
"experience": {
|
||||
"chunk_profile": "medium",
|
||||
"edge_defaults": ["derived_from"],
|
||||
"retriever_weight": 0.9
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
def load_type_registry(path: str = "config/types.yaml", silent: bool = False) -> Dict[str, Any]:
|
||||
global _CACHE
|
||||
if _CACHE:
|
||||
return _CACHE
|
||||
|
||||
def load_yaml(p: str) -> Dict[str, Any]:
|
||||
if yaml is None:
|
||||
return {}
|
||||
with open(p, "r", encoding="utf-8") as f:
|
||||
return yaml.safe_load(f) or {}
|
||||
|
||||
def load_json(p: str) -> Dict[str, Any]:
|
||||
with open(p, "r", encoding="utf-8") as f:
|
||||
return json.load(f) or {}
|
||||
|
||||
reg: Dict[str, Any] = {}
|
||||
if os.path.exists(path):
|
||||
def _safe_load_yaml_or_json(path: str) -> Dict[str, Any]:
|
||||
if not os.path.exists(path):
|
||||
return {}
|
||||
try:
|
||||
with open(path, "r", encoding="utf-8") as f:
|
||||
data = f.read()
|
||||
except Exception:
|
||||
return {}
|
||||
# YAML bevorzugen, wenn verfügbar und Datei nach YAML aussieht
|
||||
if path.lower().endswith((".yaml", ".yml")) and yaml is not None:
|
||||
try:
|
||||
if path.endswith(".yaml") or path.endswith(".yml"):
|
||||
reg = load_yaml(path)
|
||||
elif path.endswith(".json"):
|
||||
reg = load_json(path)
|
||||
except Exception as e:
|
||||
if not silent:
|
||||
print(f"[type_registry] WARN: failed to load {path}: {e}")
|
||||
return yaml.safe_load(data) or {}
|
||||
except Exception:
|
||||
return {}
|
||||
# JSON fallback
|
||||
try:
|
||||
return json.loads(data)
|
||||
except Exception:
|
||||
return {}
|
||||
|
||||
if not reg:
|
||||
# Fallback
|
||||
reg = DEFAULT_REG
|
||||
def load_type_registry(path: Optional[str] = None) -> Dict[str, Any]:
|
||||
"""
|
||||
Lädt einmalig die Registry und cached sie.
|
||||
Fehlt sie, wird ein leeres Dict geliefert (keine Fehler).
|
||||
"""
|
||||
key = path or os.getenv("TYPE_REGISTRY_PATH", "config/types.yaml")
|
||||
if key in _CACHE:
|
||||
return _CACHE[key]
|
||||
obj = _safe_load_yaml_or_json(key)
|
||||
if not isinstance(obj, dict):
|
||||
obj = {}
|
||||
_CACHE[key] = obj
|
||||
return obj
|
||||
|
||||
_CACHE = reg
|
||||
return _CACHE
|
||||
def _types_map(reg: Dict[str, Any]) -> Dict[str, Any]:
|
||||
return reg.get("types", {}) if isinstance(reg, dict) else {}
|
||||
|
||||
def resolve_chunk_profile(note_type: str, default_profile: str = "default") -> str:
|
||||
reg = load_type_registry()
|
||||
tmap = _types_map(reg)
|
||||
entry = tmap.get(note_type, {})
|
||||
return str(entry.get("chunk_profile", default_profile))
|
||||
|
||||
def get_edge_defaults_for_type(note_type: str) -> List[str]:
|
||||
reg = load_type_registry()
|
||||
tmap = _types_map(reg)
|
||||
entry = tmap.get(note_type, {})
|
||||
v = entry.get("edge_defaults", [])
|
||||
if not isinstance(v, list):
|
||||
return []
|
||||
return [str(x) for x in v]
|
||||
|
||||
def get_retriever_weight_for_type(note_type: str) -> Optional[float]:
|
||||
reg = load_type_registry()
|
||||
tmap = _types_map(reg)
|
||||
entry = tmap.get(note_type, {})
|
||||
v = entry.get("retriever_weight", None)
|
||||
try:
|
||||
return float(v) if v is not None else None
|
||||
except Exception:
|
||||
return None
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user