From af36c410b4125f35faa10939c354d1805b3efb72 Mon Sep 17 00:00:00 2001 From: Lars Date: Sun, 9 Nov 2025 10:11:19 +0100 Subject: [PATCH] app/core/note_payload.py aktualisiert --- app/core/note_payload.py | 335 ++++++++++++++------------------------- 1 file changed, 123 insertions(+), 212 deletions(-) diff --git a/app/core/note_payload.py b/app/core/note_payload.py index 81f8f95..627cc39 100644 --- a/app/core/note_payload.py +++ b/app/core/note_payload.py @@ -1,248 +1,159 @@ +# note_payload.py """ -note_payload.py — v1.4.2 ------------------------- -Robuste, abwärtskompatible Payload-Erzeugung für Notes. - -Ziele -- Setzt `retriever_weight`, `chunk_profile`, `edge_defaults` deterministisch. -- Priorität: Frontmatter > Typ-Defaults (config/config.yaml oder config/types.yaml) > ENV > Fallback. -- Akzeptiert ParsedNote-Objekte *oder* Dicts. -- Verträgt zusätzliche kwargs (z. B. vault_root/search_root/cfg). -- Keine Verwendung nicht-serialisierbarer Typen. - -Hinweis -- Diese Datei **lädt Konfig** nur opportunistisch (./config/config.yaml oder ./config/types.yaml relativ zum CWD - bzw. zu `search_root`/`vault_root`, falls übergeben). Wenn dein Aufrufer bereits eine Konfiguration geladen hat, - kann er sie via `types_config` kwarg übergeben (dict wie in deinem Beispiel). - -Autor: ChatGPT -Lizenz: MIT +Mindnet - Note Payload Builder +Version: 1.4.3 +Beschreibung: +- Robust gegenüber alten/neuen Aufrufsignaturen (toleriert *args, **kwargs). +- Liest Typ-Defaults aus ./config/config.yaml oder ./config/types.yaml. +- Setzt in mindnet_notes u.a.: + - retriever_weight (Frontmatter > Typ-Defaults > ENV > 1.0) + - chunk_profile (Frontmatter > Typ-Defaults > ENV > "medium") + - edge_defaults (Frontmatter > Typ-Defaults > []) + - path, type, title, note_id, tags, created/modified/date (falls vorhanden) +- Garantiert JSON-serialisierbare Payloads. """ + from __future__ import annotations - +from typing import Any, Dict, Optional import os -from pathlib import Path -from typing import Any, Dict, Optional, Union, List +import json +import pathlib +import yaml -try: - import yaml # type: ignore -except Exception: # pragma: no cover - yaml ist optional, wir degradieren dann sauber - yaml = None # type: ignore - - -# ------------------------------ -# Hilfsfunktionen (keine I/O Magie) -# ------------------------------ def _as_dict(note: Any) -> Dict[str, Any]: - """Konvertiert eine ParsedNote-ähnliche Struktur robust in ein Dict.""" if isinstance(note, dict): - return dict(note) - # Objekt -> vorsichtig Attribute lesen - out: Dict[str, Any] = {} - for attr in ("note_id", "id", "title", "type", "frontmatter", "meta", "body", "text", "content", "path"): + return note + d: Dict[str, Any] = {} + for attr in ( + "id", + "note_id", + "title", + "path", + "frontmatter", + "meta", + "body", + "text", + "type", + "created", + "modified", + "chunks", + "tags", + ): if hasattr(note, attr): - out[attr] = getattr(note, attr) - # Manche Parser haben .data / .raw etc. - if hasattr(note, "__dict__"): - # nichts überschreiben, nur fehlende ergänzen (nur einfache Typen) - for k, v in note.__dict__.items(): - if k not in out: - out[k] = v - return out + d[attr] = getattr(note, attr) + # manche Parser nutzen "metadata" statt "frontmatter" + if "frontmatter" not in d and hasattr(note, "metadata"): + d["frontmatter"] = getattr(note, "metadata") + return d -def _safe_get(d: Dict[str, Any], key: str, default: Any = None) -> Any: - """Dict-get ohne Mutation, akzeptiert fehlende Dicts.""" - if not isinstance(d, dict): - return default - return d.get(key, default) +def _load_types_config(explicit: Optional[Dict[str, Any]] = None) -> Dict[str, Any]: + if isinstance(explicit, dict): + return explicit + for rel in ("config/config.yaml", "config/types.yaml"): + p = pathlib.Path(rel) + if p.exists(): + with p.open("r", encoding="utf-8") as f: + data = yaml.safe_load(f) or {} + # zulässig: {"types": {...}} oder direkt {...} + if isinstance(data, dict) and "types" in data and isinstance(data["types"], dict): + return data["types"] + return data if isinstance(data, dict) else {} + return {} -def _load_types_config(search_root: Optional[Union[str, Path]] = None, - preloaded: Optional[Dict[str, Any]] = None) -> Dict[str, Any]: - """Lädt Typ-Defaults aus config.yaml oder types.yaml (falls vorhanden). - Struktur erwartet wie im Beispiel: - { - "version": "1.0", - "types": { - "concept": {"chunk_profile": "medium", "edge_defaults": [...], "retriever_weight": 0.33}, - ... - } - } - """ - if isinstance(preloaded, dict) and "types" in preloaded: - return preloaded - - candidates: List[Path] = [] - if search_root: - root = Path(search_root) - candidates.extend([root / "config.yaml", root / "config" / "config.yaml", root / "config" / "types.yaml"]) - # relative zum CWD - cwd = Path.cwd() - candidates.extend([cwd / "config.yaml", cwd / "config" / "config.yaml", cwd / "config" / "types.yaml"]) - - for p in candidates: - if p.exists() and p.is_file(): - if yaml is None: - break - try: - data = yaml.safe_load(p.read_text(encoding="utf-8")) or {} - if isinstance(data, dict) and "types" in data: - return data - except Exception: - # still und hart, kein Crash bei kaputter Datei - pass - return {"version": "1.0", "types": {}} +def _get_front(n: Dict[str, Any]) -> Dict[str, Any]: + fm = n.get("frontmatter") or n.get("meta") or {} + return fm if isinstance(fm, dict) else {} -def _coerce_float(val: Any, default: float) -> float: - try: - if val is None: - return default - if isinstance(val, (int, float)): - return float(val) - if isinstance(val, str): - return float(val.strip()) - except Exception: - pass - return default - - -def _ensure_str_list(v: Any) -> List[str]: - if v is None: - return [] - if isinstance(v, (list, tuple)): - return [str(x) for x in v if x is not None] - return [str(v)] - - -def _resolve_type(note_d: Dict[str, Any]) -> str: - fm = note_d.get("frontmatter") or {} - t = _safe_get(fm, "type") or note_d.get("type") - if not t and isinstance(note_d.get("meta"), dict): - t = note_d["meta"].get("type") - return str(t or "concept") - - -def _resolve_title(note_d: Dict[str, Any]) -> str: - fm = note_d.get("frontmatter") or {} - t = _safe_get(fm, "title") or note_d.get("title") - return str(t or "") - - -def _resolve_note_id(note_d: Dict[str, Any]) -> Optional[str]: - for k in ("note_id", "id"): - v = note_d.get(k) - if isinstance(v, str) and v: +def _coalesce(*vals): + for v in vals: + if v is not None: return v return None -def _resolve_body(note_d: Dict[str, Any]) -> str: - for k in ("body", "text", "content"): - v = note_d.get(k) - if isinstance(v, str) and v.strip(): - return v - return "" +def _env_float(name: str, default: float) -> float: + try: + return float(os.environ.get(name, default)) + except Exception: + return default -def _resolve_defaults_for_type(types_cfg: Dict[str, Any], typ: str) -> Dict[str, Any]: - if not isinstance(types_cfg, dict): - return {} - t = (types_cfg.get("types") or {}).get(typ) or {} - return t if isinstance(t, dict) else {} +def _ensure_list(x) -> list: + if x is None: + return [] + if isinstance(x, list): + return [str(i) for i in x] + if isinstance(x, (set, tuple)): + return [str(i) for i in list(x)] + return [str(x)] -def _compute_retriever_weight(note_d: Dict[str, Any], types_cfg: Dict[str, Any], typ: str) -> float: - fm = note_d.get("frontmatter") or {} - # 1) Frontmatter - if "retriever_weight" in fm: - return _coerce_float(fm.get("retriever_weight"), 1.0) - # 2) Typ-Defaults - tdef = _resolve_defaults_for_type(types_cfg, typ) - if "retriever_weight" in tdef: - return _coerce_float(tdef.get("retriever_weight"), 1.0) - # 3) ENV - envv = os.getenv("MINDNET_DEFAULT_RETRIEVER_WEIGHT") - if envv: - return _coerce_float(envv, 1.0) - # 4) Fallback - return 1.0 - - -def _compute_chunk_profile(note_d: Dict[str, Any], types_cfg: Dict[str, Any], typ: str) -> str: - fm = note_d.get("frontmatter") or {} - if "chunk_profile" in fm: - return str(fm.get("chunk_profile")) - tdef = _resolve_defaults_for_type(types_cfg, typ) - if "chunk_profile" in tdef: - return str(tdef.get("chunk_profile")) - envv = os.getenv("MINDNET_DEFAULT_CHUNK_PROFILE") - if envv: - return str(envv) - return "medium" - - -def _compute_edge_defaults(note_d: Dict[str, Any], types_cfg: Dict[str, Any], typ: str) -> List[str]: - fm = note_d.get("frontmatter") or {} - if "edge_defaults" in fm: - return _ensure_str_list(fm.get("edge_defaults")) - tdef = _resolve_defaults_for_type(types_cfg, typ) - if "edge_defaults" in tdef: - return _ensure_str_list(tdef.get("edge_defaults")) - return [] - - -# ------------------------------ -# Öffentliche API -# ------------------------------ - def make_note_payload(note: Any, *args, **kwargs) -> Dict[str, Any]: - """Erzeugt das Payload-Dict für eine Note. - - Akzeptierte zusätzliche kwargs: - - types_config: bereits geladene Config (dict mit "types") - - search_root / vault_root: Ordner, in dem config/* gesucht wird """ - note_d = _as_dict(note) + Build JSON-serialisable payload for a Note. + Accepts legacy extra args/kwargs (e.g. types_config, vault_root) without error. + """ + n = _as_dict(note) + types_cfg = kwargs.get("types_config") or (args[0] if args else None) + types_cfg = _load_types_config(types_cfg) - # Konfig finden - types_config = kwargs.get("types_config") - search_root = kwargs.get("search_root") or kwargs.get("vault_root") - types_cfg = _load_types_config(search_root, types_config) + fm = _get_front(n) + note_type = str(fm.get("type") or n.get("type") or "note") + cfg_for_type = types_cfg.get(note_type, {}) if isinstance(types_cfg, dict) else {} - # Felder auflösen - typ = _resolve_type(note_d) - title = _resolve_title(note_d) - note_id = _resolve_note_id(note_d) - body = _resolve_body(note_d) + default_rw = _env_float("MINDNET_DEFAULT_RETRIEVER_WEIGHT", 1.0) - retriever_weight = _compute_retriever_weight(note_d, types_cfg, typ) - chunk_profile = _compute_chunk_profile(note_d, types_cfg, typ) - edge_defaults = _compute_edge_defaults(note_d, types_cfg, typ) + retriever_weight = _coalesce( + fm.get("retriever_weight"), + cfg_for_type.get("retriever_weight"), + default_rw, + ) + try: + retriever_weight = float(retriever_weight) + except Exception: + retriever_weight = default_rw + + chunk_profile = _coalesce( + fm.get("chunk_profile"), + cfg_for_type.get("chunk_profile"), + os.environ.get("MINDNET_DEFAULT_CHUNK_PROFILE", "medium"), + ) + if not isinstance(chunk_profile, str): + chunk_profile = "medium" + + edge_defaults = _ensure_list( + _coalesce(fm.get("edge_defaults"), cfg_for_type.get("edge_defaults"), []) + ) + + note_id = n.get("note_id") or n.get("id") or fm.get("id") + title = n.get("title") or fm.get("title") or "" + path = n.get("path") + if isinstance(path, pathlib.Path): + path = str(path) - # Payload zusammenstellen (nur JSON-fähige Typen) payload: Dict[str, Any] = { - "type": typ, + "note_id": note_id, "title": title, - "retriever_weight": float(retriever_weight), - "chunk_profile": str(chunk_profile), + "type": note_type, + "path": path, + "retriever_weight": retriever_weight, + "chunk_profile": chunk_profile, "edge_defaults": edge_defaults, } - if note_id: - payload["note_id"] = note_id - if body: - payload["body_preview"] = body[:5000] # nur Vorschau, Retriever nutzt Chunks - # Frontmatter relevante Keys durchreichen (ohne Binärdaten/Objekte) - fm = note_d.get("frontmatter") or {} - if isinstance(fm, dict): - for k, v in fm.items(): - if k in ("type", "retriever_weight", "chunk_profile", "edge_defaults"): - continue - # nur einfache/nützliche Typen durchlassen - if isinstance(v, (str, int, float, bool, list, dict)) or v is None: - payload[f"fm_{k}"] = v + tags = fm.get("tags") or fm.get("keywords") or n.get("tags") + if tags: + payload["tags"] = _ensure_list(tags) + for k in ("created", "modified", "date"): + v = fm.get(k) or n.get(k) + if v: + payload[k] = str(v) + + # Validierungs-RTT (stellt JSON-Serialisierbarkeit sicher) + json.loads(json.dumps(payload, ensure_ascii=False)) return payload