""" note_payload.py — v1.4.2 ------------------------ Robuste, abwärtskompatible Payload-Erzeugung für Notes. Ziele - Setzt `retriever_weight`, `chunk_profile`, `edge_defaults` deterministisch. - Priorität: Frontmatter > Typ-Defaults (config/config.yaml oder config/types.yaml) > ENV > Fallback. - Akzeptiert ParsedNote-Objekte *oder* Dicts. - Verträgt zusätzliche kwargs (z. B. vault_root/search_root/cfg). - Keine Verwendung nicht-serialisierbarer Typen. Hinweis - Diese Datei **lädt Konfig** nur opportunistisch (./config/config.yaml oder ./config/types.yaml relativ zum CWD bzw. zu `search_root`/`vault_root`, falls übergeben). Wenn dein Aufrufer bereits eine Konfiguration geladen hat, kann er sie via `types_config` kwarg übergeben (dict wie in deinem Beispiel). Autor: ChatGPT Lizenz: MIT """ from __future__ import annotations import os from pathlib import Path from typing import Any, Dict, Optional, Union, List try: import yaml # type: ignore except Exception: # pragma: no cover - yaml ist optional, wir degradieren dann sauber yaml = None # type: ignore # ------------------------------ # Hilfsfunktionen (keine I/O Magie) # ------------------------------ def _as_dict(note: Any) -> Dict[str, Any]: """Konvertiert eine ParsedNote-ähnliche Struktur robust in ein Dict.""" if isinstance(note, dict): return dict(note) # Objekt -> vorsichtig Attribute lesen out: Dict[str, Any] = {} for attr in ("note_id", "id", "title", "type", "frontmatter", "meta", "body", "text", "content", "path"): if hasattr(note, attr): out[attr] = getattr(note, attr) # Manche Parser haben .data / .raw etc. if hasattr(note, "__dict__"): # nichts überschreiben, nur fehlende ergänzen (nur einfache Typen) for k, v in note.__dict__.items(): if k not in out: out[k] = v return out def _safe_get(d: Dict[str, Any], key: str, default: Any = None) -> Any: """Dict-get ohne Mutation, akzeptiert fehlende Dicts.""" if not isinstance(d, dict): return default return d.get(key, default) def _load_types_config(search_root: Optional[Union[str, Path]] = None, preloaded: Optional[Dict[str, Any]] = None) -> Dict[str, Any]: """Lädt Typ-Defaults aus config.yaml oder types.yaml (falls vorhanden). Struktur erwartet wie im Beispiel: { "version": "1.0", "types": { "concept": {"chunk_profile": "medium", "edge_defaults": [...], "retriever_weight": 0.33}, ... } } """ if isinstance(preloaded, dict) and "types" in preloaded: return preloaded candidates: List[Path] = [] if search_root: root = Path(search_root) candidates.extend([root / "config.yaml", root / "config" / "config.yaml", root / "config" / "types.yaml"]) # relative zum CWD cwd = Path.cwd() candidates.extend([cwd / "config.yaml", cwd / "config" / "config.yaml", cwd / "config" / "types.yaml"]) for p in candidates: if p.exists() and p.is_file(): if yaml is None: break try: data = yaml.safe_load(p.read_text(encoding="utf-8")) or {} if isinstance(data, dict) and "types" in data: return data except Exception: # still und hart, kein Crash bei kaputter Datei pass return {"version": "1.0", "types": {}} def _coerce_float(val: Any, default: float) -> float: try: if val is None: return default if isinstance(val, (int, float)): return float(val) if isinstance(val, str): return float(val.strip()) except Exception: pass return default def _ensure_str_list(v: Any) -> List[str]: if v is None: return [] if isinstance(v, (list, tuple)): return [str(x) for x in v if x is not None] return [str(v)] def _resolve_type(note_d: Dict[str, Any]) -> str: fm = note_d.get("frontmatter") or {} t = _safe_get(fm, "type") or note_d.get("type") if not t and isinstance(note_d.get("meta"), dict): t = note_d["meta"].get("type") return str(t or "concept") def _resolve_title(note_d: Dict[str, Any]) -> str: fm = note_d.get("frontmatter") or {} t = _safe_get(fm, "title") or note_d.get("title") return str(t or "") def _resolve_note_id(note_d: Dict[str, Any]) -> Optional[str]: for k in ("note_id", "id"): v = note_d.get(k) if isinstance(v, str) and v: return v return None def _resolve_body(note_d: Dict[str, Any]) -> str: for k in ("body", "text", "content"): v = note_d.get(k) if isinstance(v, str) and v.strip(): return v return "" def _resolve_defaults_for_type(types_cfg: Dict[str, Any], typ: str) -> Dict[str, Any]: if not isinstance(types_cfg, dict): return {} t = (types_cfg.get("types") or {}).get(typ) or {} return t if isinstance(t, dict) else {} def _compute_retriever_weight(note_d: Dict[str, Any], types_cfg: Dict[str, Any], typ: str) -> float: fm = note_d.get("frontmatter") or {} # 1) Frontmatter if "retriever_weight" in fm: return _coerce_float(fm.get("retriever_weight"), 1.0) # 2) Typ-Defaults tdef = _resolve_defaults_for_type(types_cfg, typ) if "retriever_weight" in tdef: return _coerce_float(tdef.get("retriever_weight"), 1.0) # 3) ENV envv = os.getenv("MINDNET_DEFAULT_RETRIEVER_WEIGHT") if envv: return _coerce_float(envv, 1.0) # 4) Fallback return 1.0 def _compute_chunk_profile(note_d: Dict[str, Any], types_cfg: Dict[str, Any], typ: str) -> str: fm = note_d.get("frontmatter") or {} if "chunk_profile" in fm: return str(fm.get("chunk_profile")) tdef = _resolve_defaults_for_type(types_cfg, typ) if "chunk_profile" in tdef: return str(tdef.get("chunk_profile")) envv = os.getenv("MINDNET_DEFAULT_CHUNK_PROFILE") if envv: return str(envv) return "medium" def _compute_edge_defaults(note_d: Dict[str, Any], types_cfg: Dict[str, Any], typ: str) -> List[str]: fm = note_d.get("frontmatter") or {} if "edge_defaults" in fm: return _ensure_str_list(fm.get("edge_defaults")) tdef = _resolve_defaults_for_type(types_cfg, typ) if "edge_defaults" in tdef: return _ensure_str_list(tdef.get("edge_defaults")) return [] # ------------------------------ # Öffentliche API # ------------------------------ def make_note_payload(note: Any, *args, **kwargs) -> Dict[str, Any]: """Erzeugt das Payload-Dict für eine Note. Akzeptierte zusätzliche kwargs: - types_config: bereits geladene Config (dict mit "types") - search_root / vault_root: Ordner, in dem config/* gesucht wird """ note_d = _as_dict(note) # Konfig finden types_config = kwargs.get("types_config") search_root = kwargs.get("search_root") or kwargs.get("vault_root") types_cfg = _load_types_config(search_root, types_config) # Felder auflösen typ = _resolve_type(note_d) title = _resolve_title(note_d) note_id = _resolve_note_id(note_d) body = _resolve_body(note_d) retriever_weight = _compute_retriever_weight(note_d, types_cfg, typ) chunk_profile = _compute_chunk_profile(note_d, types_cfg, typ) edge_defaults = _compute_edge_defaults(note_d, types_cfg, typ) # Payload zusammenstellen (nur JSON-fähige Typen) payload: Dict[str, Any] = { "type": typ, "title": title, "retriever_weight": float(retriever_weight), "chunk_profile": str(chunk_profile), "edge_defaults": edge_defaults, } if note_id: payload["note_id"] = note_id if body: payload["body_preview"] = body[:5000] # nur Vorschau, Retriever nutzt Chunks # Frontmatter relevante Keys durchreichen (ohne Binärdaten/Objekte) fm = note_d.get("frontmatter") or {} if isinstance(fm, dict): for k, v in fm.items(): if k in ("type", "retriever_weight", "chunk_profile", "edge_defaults"): continue # nur einfache/nützliche Typen durchlassen if isinstance(v, (str, int, float, bool, list, dict)) or v is None: payload[f"fm_{k}"] = v return payload