app/core/note_payload.py aktualisiert
All checks were successful
Deploy mindnet to llm-node / deploy (push) Successful in 3s

This commit is contained in:
Lars 2025-11-09 10:11:19 +01:00
parent 4dcd606c10
commit af36c410b4

View File

@ -1,248 +1,159 @@
# note_payload.py
""" """
note_payload.py v1.4.2 Mindnet - Note Payload Builder
------------------------ Version: 1.4.3
Robuste, abwärtskompatible Payload-Erzeugung für Notes. Beschreibung:
- Robust gegenüber alten/neuen Aufrufsignaturen (toleriert *args, **kwargs).
Ziele - Liest Typ-Defaults aus ./config/config.yaml oder ./config/types.yaml.
- Setzt `retriever_weight`, `chunk_profile`, `edge_defaults` deterministisch. - Setzt in mindnet_notes u.a.:
- Priorität: Frontmatter > Typ-Defaults (config/config.yaml oder config/types.yaml) > ENV > Fallback. - retriever_weight (Frontmatter > Typ-Defaults > ENV > 1.0)
- Akzeptiert ParsedNote-Objekte *oder* Dicts. - chunk_profile (Frontmatter > Typ-Defaults > ENV > "medium")
- Verträgt zusätzliche kwargs (z. B. vault_root/search_root/cfg). - edge_defaults (Frontmatter > Typ-Defaults > [])
- Keine Verwendung nicht-serialisierbarer Typen. - path, type, title, note_id, tags, created/modified/date (falls vorhanden)
- Garantiert JSON-serialisierbare Payloads.
Hinweis
- Diese Datei **lädt Konfig** nur opportunistisch (./config/config.yaml oder ./config/types.yaml relativ zum CWD
bzw. zu `search_root`/`vault_root`, falls übergeben). Wenn dein Aufrufer bereits eine Konfiguration geladen hat,
kann er sie via `types_config` kwarg übergeben (dict wie in deinem Beispiel).
Autor: ChatGPT
Lizenz: MIT
""" """
from __future__ import annotations from __future__ import annotations
from typing import Any, Dict, Optional
import os import os
from pathlib import Path import json
from typing import Any, Dict, Optional, Union, List import pathlib
import yaml
try:
import yaml # type: ignore
except Exception: # pragma: no cover - yaml ist optional, wir degradieren dann sauber
yaml = None # type: ignore
# ------------------------------
# Hilfsfunktionen (keine I/O Magie)
# ------------------------------
def _as_dict(note: Any) -> Dict[str, Any]: def _as_dict(note: Any) -> Dict[str, Any]:
"""Konvertiert eine ParsedNote-ähnliche Struktur robust in ein Dict."""
if isinstance(note, dict): if isinstance(note, dict):
return dict(note) return note
# Objekt -> vorsichtig Attribute lesen d: Dict[str, Any] = {}
out: Dict[str, Any] = {} for attr in (
for attr in ("note_id", "id", "title", "type", "frontmatter", "meta", "body", "text", "content", "path"): "id",
"note_id",
"title",
"path",
"frontmatter",
"meta",
"body",
"text",
"type",
"created",
"modified",
"chunks",
"tags",
):
if hasattr(note, attr): if hasattr(note, attr):
out[attr] = getattr(note, attr) d[attr] = getattr(note, attr)
# Manche Parser haben .data / .raw etc. # manche Parser nutzen "metadata" statt "frontmatter"
if hasattr(note, "__dict__"): if "frontmatter" not in d and hasattr(note, "metadata"):
# nichts überschreiben, nur fehlende ergänzen (nur einfache Typen) d["frontmatter"] = getattr(note, "metadata")
for k, v in note.__dict__.items(): return d
if k not in out:
out[k] = v
return out
def _safe_get(d: Dict[str, Any], key: str, default: Any = None) -> Any: def _load_types_config(explicit: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
"""Dict-get ohne Mutation, akzeptiert fehlende Dicts.""" if isinstance(explicit, dict):
if not isinstance(d, dict): return explicit
return default for rel in ("config/config.yaml", "config/types.yaml"):
return d.get(key, default) p = pathlib.Path(rel)
if p.exists():
with p.open("r", encoding="utf-8") as f:
data = yaml.safe_load(f) or {}
# zulässig: {"types": {...}} oder direkt {...}
if isinstance(data, dict) and "types" in data and isinstance(data["types"], dict):
return data["types"]
return data if isinstance(data, dict) else {}
return {}
def _load_types_config(search_root: Optional[Union[str, Path]] = None, def _get_front(n: Dict[str, Any]) -> Dict[str, Any]:
preloaded: Optional[Dict[str, Any]] = None) -> Dict[str, Any]: fm = n.get("frontmatter") or n.get("meta") or {}
"""Lädt Typ-Defaults aus config.yaml oder types.yaml (falls vorhanden). return fm if isinstance(fm, dict) else {}
Struktur erwartet wie im Beispiel:
{
"version": "1.0",
"types": {
"concept": {"chunk_profile": "medium", "edge_defaults": [...], "retriever_weight": 0.33},
...
}
}
"""
if isinstance(preloaded, dict) and "types" in preloaded:
return preloaded
candidates: List[Path] = []
if search_root:
root = Path(search_root)
candidates.extend([root / "config.yaml", root / "config" / "config.yaml", root / "config" / "types.yaml"])
# relative zum CWD
cwd = Path.cwd()
candidates.extend([cwd / "config.yaml", cwd / "config" / "config.yaml", cwd / "config" / "types.yaml"])
for p in candidates:
if p.exists() and p.is_file():
if yaml is None:
break
try:
data = yaml.safe_load(p.read_text(encoding="utf-8")) or {}
if isinstance(data, dict) and "types" in data:
return data
except Exception:
# still und hart, kein Crash bei kaputter Datei
pass
return {"version": "1.0", "types": {}}
def _coerce_float(val: Any, default: float) -> float: def _coalesce(*vals):
try: for v in vals:
if val is None: if v is not None:
return default
if isinstance(val, (int, float)):
return float(val)
if isinstance(val, str):
return float(val.strip())
except Exception:
pass
return default
def _ensure_str_list(v: Any) -> List[str]:
if v is None:
return []
if isinstance(v, (list, tuple)):
return [str(x) for x in v if x is not None]
return [str(v)]
def _resolve_type(note_d: Dict[str, Any]) -> str:
fm = note_d.get("frontmatter") or {}
t = _safe_get(fm, "type") or note_d.get("type")
if not t and isinstance(note_d.get("meta"), dict):
t = note_d["meta"].get("type")
return str(t or "concept")
def _resolve_title(note_d: Dict[str, Any]) -> str:
fm = note_d.get("frontmatter") or {}
t = _safe_get(fm, "title") or note_d.get("title")
return str(t or "")
def _resolve_note_id(note_d: Dict[str, Any]) -> Optional[str]:
for k in ("note_id", "id"):
v = note_d.get(k)
if isinstance(v, str) and v:
return v return v
return None return None
def _resolve_body(note_d: Dict[str, Any]) -> str: def _env_float(name: str, default: float) -> float:
for k in ("body", "text", "content"): try:
v = note_d.get(k) return float(os.environ.get(name, default))
if isinstance(v, str) and v.strip(): except Exception:
return v return default
return ""
def _resolve_defaults_for_type(types_cfg: Dict[str, Any], typ: str) -> Dict[str, Any]: def _ensure_list(x) -> list:
if not isinstance(types_cfg, dict): if x is None:
return {} return []
t = (types_cfg.get("types") or {}).get(typ) or {} if isinstance(x, list):
return t if isinstance(t, dict) else {} return [str(i) for i in x]
if isinstance(x, (set, tuple)):
return [str(i) for i in list(x)]
return [str(x)]
def _compute_retriever_weight(note_d: Dict[str, Any], types_cfg: Dict[str, Any], typ: str) -> float:
fm = note_d.get("frontmatter") or {}
# 1) Frontmatter
if "retriever_weight" in fm:
return _coerce_float(fm.get("retriever_weight"), 1.0)
# 2) Typ-Defaults
tdef = _resolve_defaults_for_type(types_cfg, typ)
if "retriever_weight" in tdef:
return _coerce_float(tdef.get("retriever_weight"), 1.0)
# 3) ENV
envv = os.getenv("MINDNET_DEFAULT_RETRIEVER_WEIGHT")
if envv:
return _coerce_float(envv, 1.0)
# 4) Fallback
return 1.0
def _compute_chunk_profile(note_d: Dict[str, Any], types_cfg: Dict[str, Any], typ: str) -> str:
fm = note_d.get("frontmatter") or {}
if "chunk_profile" in fm:
return str(fm.get("chunk_profile"))
tdef = _resolve_defaults_for_type(types_cfg, typ)
if "chunk_profile" in tdef:
return str(tdef.get("chunk_profile"))
envv = os.getenv("MINDNET_DEFAULT_CHUNK_PROFILE")
if envv:
return str(envv)
return "medium"
def _compute_edge_defaults(note_d: Dict[str, Any], types_cfg: Dict[str, Any], typ: str) -> List[str]:
fm = note_d.get("frontmatter") or {}
if "edge_defaults" in fm:
return _ensure_str_list(fm.get("edge_defaults"))
tdef = _resolve_defaults_for_type(types_cfg, typ)
if "edge_defaults" in tdef:
return _ensure_str_list(tdef.get("edge_defaults"))
return []
# ------------------------------
# Öffentliche API
# ------------------------------
def make_note_payload(note: Any, *args, **kwargs) -> Dict[str, Any]: def make_note_payload(note: Any, *args, **kwargs) -> Dict[str, Any]:
"""Erzeugt das Payload-Dict für eine Note.
Akzeptierte zusätzliche kwargs:
- types_config: bereits geladene Config (dict mit "types")
- search_root / vault_root: Ordner, in dem config/* gesucht wird
""" """
note_d = _as_dict(note) Build JSON-serialisable payload for a Note.
Accepts legacy extra args/kwargs (e.g. types_config, vault_root) without error.
"""
n = _as_dict(note)
types_cfg = kwargs.get("types_config") or (args[0] if args else None)
types_cfg = _load_types_config(types_cfg)
# Konfig finden fm = _get_front(n)
types_config = kwargs.get("types_config") note_type = str(fm.get("type") or n.get("type") or "note")
search_root = kwargs.get("search_root") or kwargs.get("vault_root") cfg_for_type = types_cfg.get(note_type, {}) if isinstance(types_cfg, dict) else {}
types_cfg = _load_types_config(search_root, types_config)
# Felder auflösen default_rw = _env_float("MINDNET_DEFAULT_RETRIEVER_WEIGHT", 1.0)
typ = _resolve_type(note_d)
title = _resolve_title(note_d)
note_id = _resolve_note_id(note_d)
body = _resolve_body(note_d)
retriever_weight = _compute_retriever_weight(note_d, types_cfg, typ) retriever_weight = _coalesce(
chunk_profile = _compute_chunk_profile(note_d, types_cfg, typ) fm.get("retriever_weight"),
edge_defaults = _compute_edge_defaults(note_d, types_cfg, typ) cfg_for_type.get("retriever_weight"),
default_rw,
)
try:
retriever_weight = float(retriever_weight)
except Exception:
retriever_weight = default_rw
chunk_profile = _coalesce(
fm.get("chunk_profile"),
cfg_for_type.get("chunk_profile"),
os.environ.get("MINDNET_DEFAULT_CHUNK_PROFILE", "medium"),
)
if not isinstance(chunk_profile, str):
chunk_profile = "medium"
edge_defaults = _ensure_list(
_coalesce(fm.get("edge_defaults"), cfg_for_type.get("edge_defaults"), [])
)
note_id = n.get("note_id") or n.get("id") or fm.get("id")
title = n.get("title") or fm.get("title") or ""
path = n.get("path")
if isinstance(path, pathlib.Path):
path = str(path)
# Payload zusammenstellen (nur JSON-fähige Typen)
payload: Dict[str, Any] = { payload: Dict[str, Any] = {
"type": typ, "note_id": note_id,
"title": title, "title": title,
"retriever_weight": float(retriever_weight), "type": note_type,
"chunk_profile": str(chunk_profile), "path": path,
"retriever_weight": retriever_weight,
"chunk_profile": chunk_profile,
"edge_defaults": edge_defaults, "edge_defaults": edge_defaults,
} }
if note_id:
payload["note_id"] = note_id
if body:
payload["body_preview"] = body[:5000] # nur Vorschau, Retriever nutzt Chunks
# Frontmatter relevante Keys durchreichen (ohne Binärdaten/Objekte) tags = fm.get("tags") or fm.get("keywords") or n.get("tags")
fm = note_d.get("frontmatter") or {} if tags:
if isinstance(fm, dict): payload["tags"] = _ensure_list(tags)
for k, v in fm.items():
if k in ("type", "retriever_weight", "chunk_profile", "edge_defaults"):
continue
# nur einfache/nützliche Typen durchlassen
if isinstance(v, (str, int, float, bool, list, dict)) or v is None:
payload[f"fm_{k}"] = v
for k in ("created", "modified", "date"):
v = fm.get(k) or n.get(k)
if v:
payload[k] = str(v)
# Validierungs-RTT (stellt JSON-Serialisierbarkeit sicher)
json.loads(json.dumps(payload, ensure_ascii=False))
return payload return payload