diff --git a/app/core/chunk_payload.py b/app/core/chunk_payload.py index e5b04d3..632dfbb 100644 --- a/app/core/chunk_payload.py +++ b/app/core/chunk_payload.py @@ -1,55 +1,105 @@ - """ -chunk_payload.py — Mindnet payload builder (Chunks) -Version: 1.3.0 (2025-11-09) +chunk_payload.py — v1.4.2 +------------------------- +Robuste, abwärtskompatible Payload-Erzeugung für Chunks. -Purpose -------- -Build Qdrant-compatible JSON payloads for *chunks* of a parsed note. -Tolerant to different call signatures and accepts both dict-like and object-like inputs. +Ziele +- Setzt pro Chunk `text`, `retriever_weight`, `chunk_profile`, `note_id`. +- Akzeptiert ParsedNote-Objekte *oder* Dicts, inklusive bereits vorsegmentierter .chunks. +- Verträgt zusätzliche args/kwargs (kompatibel zu älteren Aufrufern). +- Konfig-Auflösung identisch zu note_payload.py. -Key features ------------- -- Reads type defaults from `config/config.yaml` or `config/types.yaml` (same schema). -- Resolves fields with precedence: - Frontmatter > type-defaults > ENV > fallback. -- Sets per chunk: - * `note_id`, `note_title`, `type` - * `retriever_weight` (float) - * `chunk_profile` (short|medium|long) - * `text` (never empty: falls back to whole note body/text) - * `order`, `section`, `start`, `end` (if available) -- Backwards-compatible signature: accepts **kwargs to swallow unknown args. - -Input ------ -`parsed_note` may be: - - dict with keys: id, title, body/text, chunks(list), frontmatter(dict), type - - object with equivalent attributes - -Each chunk may be dict-like or object-like with keys/attrs such as: - id, text, order, section, start, end +Autor: ChatGPT +Lizenz: MIT """ - from __future__ import annotations import os +import hashlib from pathlib import Path from typing import Any, Dict, List, Optional, Union try: import yaml # type: ignore except Exception: # pragma: no cover - yaml = None + yaml = None # type: ignore -def _get(obj: Any, key: str, default: Any = None) -> Any: - if isinstance(obj, dict): - return obj.get(key, default) - return getattr(obj, key, default) -def _frontmatter(obj: Any) -> Dict[str, Any]: - fm = _get(obj, "frontmatter", {}) or {} - return fm if isinstance(fm, dict) else {} +def _as_dict(note: Any) -> Dict[str, Any]: + if isinstance(note, dict): + return dict(note) + out: Dict[str, Any] = {} + for attr in ("note_id", "id", "title", "type", "frontmatter", "meta", "body", "text", "content", "path", "chunks"): + if hasattr(note, attr): + out[attr] = getattr(note, attr) + if hasattr(note, "__dict__"): + for k, v in note.__dict__.items(): + if k not in out: + out[k] = v + return out + + +def _load_types_config(search_root: Optional[Union[str, Path]] = None, + preloaded: Optional[Dict[str, Any]] = None) -> Dict[str, Any]: + if isinstance(preloaded, dict) and "types" in preloaded: + return preloaded + + candidates: List[Path] = [] + if search_root: + root = Path(search_root) + candidates.extend([root / "config.yaml", root / "config" / "config.yaml", root / "config" / "types.yaml"]) + cwd = Path.cwd() + candidates.extend([cwd / "config.yaml", cwd / "config" / "config.yaml", cwd / "config" / "types.yaml"]) + + for p in candidates: + if p.exists() and p.is_file(): + if yaml is None: + break + try: + data = yaml.safe_load(p.read_text(encoding="utf-8")) or {} + if isinstance(data, dict) and "types" in data: + return data + except Exception: + pass + return {"version": "1.0", "types": {}} + + +def _safe_get(d: Dict[str, Any], key: str, default: Any = None) -> Any: + if not isinstance(d, dict): + return default + return d.get(key, default) + + +def _resolve_type(note_d: Dict[str, Any]) -> str: + fm = note_d.get("frontmatter") or {} + t = _safe_get(fm, "type") or note_d.get("type") + if not t and isinstance(note_d.get("meta"), dict): + t = note_d["meta"].get("type") + return str(t or "concept") + + +def _resolve_note_id(note_d: Dict[str, Any]) -> Optional[str]: + for k in ("note_id", "id"): + v = note_d.get(k) + if isinstance(v, str) and v: + return v + return None + + +def _resolve_body(note_d: Dict[str, Any]) -> str: + for k in ("body", "text", "content"): + v = note_d.get(k) + if isinstance(v, str) and v.strip(): + return v + return "" + + +def _resolve_defaults_for_type(types_cfg: Dict[str, Any], typ: str) -> Dict[str, Any]: + if not isinstance(types_cfg, dict): + return {} + t = (types_cfg.get("types") or {}).get(typ) or {} + return t if isinstance(t, dict) else {} + def _coerce_float(val: Any, default: float) -> float: try: @@ -57,124 +107,109 @@ def _coerce_float(val: Any, default: float) -> float: return default if isinstance(val, (int, float)): return float(val) - if isinstance(val, str) and val.strip(): + if isinstance(val, str): return float(val.strip()) except Exception: pass return default -def _normalize_chunk_profile(val: Any, fallback: str = "medium") -> str: - if not isinstance(val, str): - return fallback - v = val.strip().lower() - if v in {"short", "medium", "long"}: - return v - return fallback -def _safe_text(s: Any) -> str: - if s is None: - return "" +def _compute_retriever_weight(note_d: Dict[str, Any], types_cfg: Dict[str, Any], typ: str) -> float: + fm = note_d.get("frontmatter") or {} + if "retriever_weight" in fm: + return _coerce_float(fm.get("retriever_weight"), 1.0) + tdef = _resolve_defaults_for_type(types_cfg, typ) + if "retriever_weight" in tdef: + return _coerce_float(tdef.get("retriever_weight"), 1.0) + envv = os.getenv("MINDNET_DEFAULT_RETRIEVER_WEIGHT") + if envv: + return _coerce_float(envv, 1.0) + return 1.0 + + +def _compute_chunk_profile(note_d: Dict[str, Any], types_cfg: Dict[str, Any], typ: str) -> str: + fm = note_d.get("frontmatter") or {} + if "chunk_profile" in fm: + return str(fm.get("chunk_profile")) + tdef = _resolve_defaults_for_type(types_cfg, typ) + if "chunk_profile" in tdef: + return str(tdef.get("chunk_profile")) + envv = os.getenv("MINDNET_DEFAULT_CHUNK_PROFILE") + if envv: + return str(envv) + return "medium" + + +def _norm_chunk_text(s: Any) -> str: if isinstance(s, str): - return s - return str(s) + return s.strip() + return "" -def _load_types_config( - explicit_config: Optional[Dict[str, Any]] = None, - search_root: Union[str, Path, None] = None, -) -> Dict[str, Any]: - if explicit_config and isinstance(explicit_config, dict): - if "types" in explicit_config and isinstance(explicit_config["types"], dict): - return explicit_config - if yaml is None: - return {"types": {}} - candidates: List[Path] = [] - root = Path(search_root) if search_root else Path.cwd() - candidates.append(root / "config" / "config.yaml") - candidates.append(root / "config" / "types.yaml") - candidates.append(Path.cwd() / "config" / "config.yaml") - candidates.append(Path.cwd() / "config" / "types.yaml") - for p in candidates: - try: - if p.exists(): - import yaml as _y - with p.open("r", encoding="utf-8") as f: - loaded = _y.safe_load(f) or {} - if isinstance(loaded, dict) and isinstance(loaded.get("types"), dict): - return {"types": loaded["types"]} - except Exception: - continue - return {"types": {}} -def _type_defaults(note_type: str, cfg: Dict[str, Any]) -> Dict[str, Any]: - return (cfg.get("types") or {}).get(note_type, {}) if isinstance(cfg, dict) else {} +def _hash(s: str) -> str: + return hashlib.sha1(s.encode("utf-8")).hexdigest()[:12] -def make_chunk_payloads( - parsed_note: Any, - config: Optional[Dict[str, Any]] = None, - **kwargs: Any, -) -> List[Dict[str, Any]]: - search_root = kwargs.get("search_root") - fm = _frontmatter(parsed_note) - note_type = fm.get("type") or _get(parsed_note, "type") or "concept" - note_type = str(note_type).strip().lower() - cfg = _load_types_config(config, search_root) - defaults = _type_defaults(note_type, cfg) +def make_chunk_payloads(note: Any, *args, **kwargs) -> List[Dict[str, Any]]: + """Erzeugt Payloads für alle Chunks der Note. - # Resolve retriever_weight: FM > type-defaults > ENV > 1.0 - rw = fm.get("retriever_weight") - if rw is None: - rw = defaults.get("retriever_weight") - if rw is None: - env_rw = os.getenv("MINDNET_DEFAULT_RETRIEVER_WEIGHT") - rw = _coerce_float(env_rw, 1.0) - else: - rw = _coerce_float(rw, 1.0) + Akzeptierte zusätzliche kwargs: + - types_config: dict wie in config.yaml + - search_root / vault_root: für Konfigsuche - # Resolve chunk_profile: FM > type-defaults > ENV > medium - cp = fm.get("chunk_profile") - if cp is None: - cp = defaults.get("chunk_profile") - if cp is None: - cp = os.getenv("MINDNET_DEFAULT_CHUNK_PROFILE", "medium") - cp = _normalize_chunk_profile(cp, "medium") + *args werden ignoriert (Kompatibilität zu älteren Aufrufern). + """ + note_d = _as_dict(note) - note_id = _get(parsed_note, "id") - note_title = _get(parsed_note, "title") - body = _get(parsed_note, "body") or _get(parsed_note, "text") or "" + types_config = kwargs.get("types_config") + search_root = kwargs.get("search_root") or kwargs.get("vault_root") + types_cfg = _load_types_config(search_root, types_config) - items = _get(parsed_note, "chunks") or [] - payloads: List[Dict[str, Any]] = [] + typ = _resolve_type(note_d) + note_id = _resolve_note_id(note_d) or "" - if not items: - items = [{ - "id": f"{note_id}::0" if note_id else None, - "text": body, - "order": 0, - "section": None, - "start": 0, - "end": len(body) if isinstance(body, str) else None, - }] + r_weight = _compute_retriever_weight(note_d, types_cfg, typ) + c_profile = _compute_chunk_profile(note_d, types_cfg, typ) - for ch in items: - text = _safe_text(_get(ch, "text")) - if not text: - text = _safe_text(body) + out: List[Dict[str, Any]] = [] - payload = { - "note_id": note_id, - "note_title": note_title, - "type": note_type, - "retriever_weight": float(rw), - "chunk_profile": cp, - "text": text, - "order": _get(ch, "order"), - "section": _get(ch, "section"), - "start": _get(ch, "start"), - "end": _get(ch, "end"), - "chunk_id": _get(ch, "id"), - } - payload = {k: v for k, v in payload.items() if v is not None} - payloads.append(payload) + # 1) Falls der Parser bereits Chunks liefert, nutzen + pre = note_d.get("chunks") + if isinstance(pre, list) and pre: + for idx, c in enumerate(pre): + if isinstance(c, dict): + text = _norm_chunk_text(c.get("text") or c.get("body") or c.get("content")) + else: + text = _norm_chunk_text(getattr(c, "text", "")) + if not text: + # Fallback auf Note-Body, falls leer + text = _resolve_body(note_d) + if not text: + continue - return payloads + chunk_id = f"{note_id}#{idx:03d}" if note_id else _hash(text)[:8] + payload = { + "note_id": note_id, + "chunk_id": chunk_id, + "text": text, + "retriever_weight": float(r_weight), + "chunk_profile": str(c_profile), + "type": typ, + } + out.append(payload) + + # 2) Sonst als Single-Chunk aus Body/Text + if not out: + text = _resolve_body(note_d) + if text: + chunk_id = f"{note_id}#000" if note_id else _hash(text)[:8] + out.append({ + "note_id": note_id, + "chunk_id": chunk_id, + "text": text, + "retriever_weight": float(r_weight), + "chunk_profile": str(c_profile), + "type": typ, + }) + + return out diff --git a/app/core/note_payload.py b/app/core/note_payload.py index 33a0eb2..81f8f95 100644 --- a/app/core/note_payload.py +++ b/app/core/note_payload.py @@ -1,81 +1,100 @@ - """ -note_payload.py — Mindnet payload builder (Notes) -Version: 1.3.0 (2025-11-09) +note_payload.py — v1.4.2 +------------------------ +Robuste, abwärtskompatible Payload-Erzeugung für Notes. -Purpose -------- -Build Qdrant-compatible JSON payloads for *notes* from a parsed Markdown -representation. The function is tolerant to different call signatures and -accepts both dict-like and object-like "ParsedNote" inputs. +Ziele +- Setzt `retriever_weight`, `chunk_profile`, `edge_defaults` deterministisch. +- Priorität: Frontmatter > Typ-Defaults (config/config.yaml oder config/types.yaml) > ENV > Fallback. +- Akzeptiert ParsedNote-Objekte *oder* Dicts. +- Verträgt zusätzliche kwargs (z. B. vault_root/search_root/cfg). +- Keine Verwendung nicht-serialisierbarer Typen. -Key features ------------- -- Reads type defaults from `config/config.yaml` or `config/types.yaml` (same schema). -- Resolves fields with the following precedence: - Frontmatter > type-defaults > ENV > hard-coded fallback. -- Ensures only JSON-serializable types are included (no sets, Path, callables). -- Sets/normalizes: - * `type` : note type (e.g., concept, task, experience, project) - * `retriever_weight` : float, influences retrieval blending downstream - * `chunk_profile` : short | medium | long (string) - * `edge_defaults` : list[str], used by edge builder outside of this module -- Backwards-compatible signature: accepts **kwargs to swallow unknown args - (e.g., vault_root, prefix, ...). +Hinweis +- Diese Datei **lädt Konfig** nur opportunistisch (./config/config.yaml oder ./config/types.yaml relativ zum CWD + bzw. zu `search_root`/`vault_root`, falls übergeben). Wenn dein Aufrufer bereits eine Konfiguration geladen hat, + kann er sie via `types_config` kwarg übergeben (dict wie in deinem Beispiel). -Expected input (flexible) -------------------------- -`parsed_note` may be: - - dict with keys: id, title, body/text, path, frontmatter (dict), type, ... - - object with attributes: id, title, body/text, path, frontmatter, type, ... - -Schema for config files ------------------------ -version: 1.0 -types: - concept: - chunk_profile: medium - edge_defaults: ["references", "related_to"] - retriever_weight: 0.33 - task: - chunk_profile: short - edge_defaults: ["depends_on", "belongs_to"] - retriever_weight: 0.8 - experience: - chunk_profile: medium - edge_defaults: ["derived_from", "inspired_by"] - retriever_weight: 0.9 - project: - chunk_profile: long - edge_defaults: ["references", "depends_on"] - retriever_weight: 0.95 +Autor: ChatGPT +Lizenz: MIT """ - from __future__ import annotations -import json import os from pathlib import Path -from typing import Any, Dict, List, Optional, Union +from typing import Any, Dict, Optional, Union, List try: import yaml # type: ignore -except Exception: # pragma: no cover - yaml = None # The caller must ensure PyYAML is installed +except Exception: # pragma: no cover - yaml ist optional, wir degradieren dann sauber + yaml = None # type: ignore + # ------------------------------ -# Helpers +# Hilfsfunktionen (keine I/O Magie) # ------------------------------ -def _get(obj: Any, key: str, default: Any = None) -> Any: - """Get key from dict-like or attribute from object-like.""" - if isinstance(obj, dict): - return obj.get(key, default) - return getattr(obj, key, default) +def _as_dict(note: Any) -> Dict[str, Any]: + """Konvertiert eine ParsedNote-ähnliche Struktur robust in ein Dict.""" + if isinstance(note, dict): + return dict(note) + # Objekt -> vorsichtig Attribute lesen + out: Dict[str, Any] = {} + for attr in ("note_id", "id", "title", "type", "frontmatter", "meta", "body", "text", "content", "path"): + if hasattr(note, attr): + out[attr] = getattr(note, attr) + # Manche Parser haben .data / .raw etc. + if hasattr(note, "__dict__"): + # nichts überschreiben, nur fehlende ergänzen (nur einfache Typen) + for k, v in note.__dict__.items(): + if k not in out: + out[k] = v + return out + + +def _safe_get(d: Dict[str, Any], key: str, default: Any = None) -> Any: + """Dict-get ohne Mutation, akzeptiert fehlende Dicts.""" + if not isinstance(d, dict): + return default + return d.get(key, default) + + +def _load_types_config(search_root: Optional[Union[str, Path]] = None, + preloaded: Optional[Dict[str, Any]] = None) -> Dict[str, Any]: + """Lädt Typ-Defaults aus config.yaml oder types.yaml (falls vorhanden). + Struktur erwartet wie im Beispiel: + { + "version": "1.0", + "types": { + "concept": {"chunk_profile": "medium", "edge_defaults": [...], "retriever_weight": 0.33}, + ... + } + } + """ + if isinstance(preloaded, dict) and "types" in preloaded: + return preloaded + + candidates: List[Path] = [] + if search_root: + root = Path(search_root) + candidates.extend([root / "config.yaml", root / "config" / "config.yaml", root / "config" / "types.yaml"]) + # relative zum CWD + cwd = Path.cwd() + candidates.extend([cwd / "config.yaml", cwd / "config" / "config.yaml", cwd / "config" / "types.yaml"]) + + for p in candidates: + if p.exists() and p.is_file(): + if yaml is None: + break + try: + data = yaml.safe_load(p.read_text(encoding="utf-8")) or {} + if isinstance(data, dict) and "types" in data: + return data + except Exception: + # still und hart, kein Crash bei kaputter Datei + pass + return {"version": "1.0", "types": {}} -def _frontmatter(obj: Any) -> Dict[str, Any]: - fm = _get(obj, "frontmatter", {}) or {} - return fm if isinstance(fm, dict) else {} def _coerce_float(val: Any, default: float) -> float: try: @@ -83,170 +102,147 @@ def _coerce_float(val: Any, default: float) -> float: return default if isinstance(val, (int, float)): return float(val) - if isinstance(val, str) and val.strip(): + if isinstance(val, str): return float(val.strip()) except Exception: pass return default -def _normalize_chunk_profile(val: Any, fallback: str = "medium") -> str: - if not isinstance(val, str): - return fallback - v = val.strip().lower() - if v in {"short", "medium", "long"}: - return v - return fallback -def _coerce_str_list(val: Any) -> List[str]: - if val is None: +def _ensure_str_list(v: Any) -> List[str]: + if v is None: return [] - if isinstance(val, list): - out: List[str] = [] - for x in val: - if isinstance(x, str): - out.append(x) - else: - out.append(str(x)) - return out - if isinstance(val, str): - # allow comma-separated - return [x.strip() for x in val.split(",") if x.strip()] + if isinstance(v, (list, tuple)): + return [str(x) for x in v if x is not None] + return [str(v)] + + +def _resolve_type(note_d: Dict[str, Any]) -> str: + fm = note_d.get("frontmatter") or {} + t = _safe_get(fm, "type") or note_d.get("type") + if not t and isinstance(note_d.get("meta"), dict): + t = note_d["meta"].get("type") + return str(t or "concept") + + +def _resolve_title(note_d: Dict[str, Any]) -> str: + fm = note_d.get("frontmatter") or {} + t = _safe_get(fm, "title") or note_d.get("title") + return str(t or "") + + +def _resolve_note_id(note_d: Dict[str, Any]) -> Optional[str]: + for k in ("note_id", "id"): + v = note_d.get(k) + if isinstance(v, str) and v: + return v + return None + + +def _resolve_body(note_d: Dict[str, Any]) -> str: + for k in ("body", "text", "content"): + v = note_d.get(k) + if isinstance(v, str) and v.strip(): + return v + return "" + + +def _resolve_defaults_for_type(types_cfg: Dict[str, Any], typ: str) -> Dict[str, Any]: + if not isinstance(types_cfg, dict): + return {} + t = (types_cfg.get("types") or {}).get(typ) or {} + return t if isinstance(t, dict) else {} + + +def _compute_retriever_weight(note_d: Dict[str, Any], types_cfg: Dict[str, Any], typ: str) -> float: + fm = note_d.get("frontmatter") or {} + # 1) Frontmatter + if "retriever_weight" in fm: + return _coerce_float(fm.get("retriever_weight"), 1.0) + # 2) Typ-Defaults + tdef = _resolve_defaults_for_type(types_cfg, typ) + if "retriever_weight" in tdef: + return _coerce_float(tdef.get("retriever_weight"), 1.0) + # 3) ENV + envv = os.getenv("MINDNET_DEFAULT_RETRIEVER_WEIGHT") + if envv: + return _coerce_float(envv, 1.0) + # 4) Fallback + return 1.0 + + +def _compute_chunk_profile(note_d: Dict[str, Any], types_cfg: Dict[str, Any], typ: str) -> str: + fm = note_d.get("frontmatter") or {} + if "chunk_profile" in fm: + return str(fm.get("chunk_profile")) + tdef = _resolve_defaults_for_type(types_cfg, typ) + if "chunk_profile" in tdef: + return str(tdef.get("chunk_profile")) + envv = os.getenv("MINDNET_DEFAULT_CHUNK_PROFILE") + if envv: + return str(envv) + return "medium" + + +def _compute_edge_defaults(note_d: Dict[str, Any], types_cfg: Dict[str, Any], typ: str) -> List[str]: + fm = note_d.get("frontmatter") or {} + if "edge_defaults" in fm: + return _ensure_str_list(fm.get("edge_defaults")) + tdef = _resolve_defaults_for_type(types_cfg, typ) + if "edge_defaults" in tdef: + return _ensure_str_list(tdef.get("edge_defaults")) return [] -def _safe_jsonable(value: Any) -> Any: - """Ensure value is JSON-serializable (no sets, Path, callables, etc.).""" - if isinstance(value, (str, int, float, bool)) or value is None: - return value - if isinstance(value, list): - return [_safe_jsonable(v) for v in value] - if isinstance(value, dict): - return {str(k): _safe_jsonable(v) for k, v in value.items()} - if isinstance(value, Path): - return str(value) - # Avoid sets and other iterables that are not JSON-serializable - try: - json.dumps(value) - return value - except Exception: - return str(value) # ------------------------------ -# Config loading +# Öffentliche API # ------------------------------ -def _load_types_config( - explicit_config: Optional[Dict[str, Any]] = None, - search_root: Union[str, Path, None] = None, -) -> Dict[str, Any]: +def make_note_payload(note: Any, *args, **kwargs) -> Dict[str, Any]: + """Erzeugt das Payload-Dict für eine Note. + + Akzeptierte zusätzliche kwargs: + - types_config: bereits geladene Config (dict mit "types") + - search_root / vault_root: Ordner, in dem config/* gesucht wird """ - Load types config from: - 1) explicit_config (if provided) - 2) {search_root}/config/config.yaml - 3) {search_root}/config/types.yaml - 4) ./config/config.yaml - 5) ./config/types.yaml - Returns a dict with shape: {"types": {...}} (empty if none found). - """ - if explicit_config and isinstance(explicit_config, dict): - if "types" in explicit_config and isinstance(explicit_config["types"], dict): - return explicit_config + note_d = _as_dict(note) - candidates: List[Path] = [] - root = Path(search_root) if search_root else Path.cwd() - candidates.append(root / "config" / "config.yaml") - candidates.append(root / "config" / "types.yaml") - # fallback to CWD when search_root was different - candidates.append(Path.cwd() / "config" / "config.yaml") - candidates.append(Path.cwd() / "config" / "types.yaml") + # Konfig finden + types_config = kwargs.get("types_config") + search_root = kwargs.get("search_root") or kwargs.get("vault_root") + types_cfg = _load_types_config(search_root, types_config) - data = {} - if yaml is None: - return {"types": {}} + # Felder auflösen + typ = _resolve_type(note_d) + title = _resolve_title(note_d) + note_id = _resolve_note_id(note_d) + body = _resolve_body(note_d) - for p in candidates: - try: - if p.exists(): - with p.open("r", encoding="utf-8") as f: - loaded = yaml.safe_load(f) or {} - if isinstance(loaded, dict) and isinstance(loaded.get("types"), dict): - data = {"types": loaded["types"]} - break - except Exception: - continue - if not data: - data = {"types": {}} - return data - -def _type_defaults(note_type: str, cfg: Dict[str, Any]) -> Dict[str, Any]: - return (cfg.get("types") or {}).get(note_type, {}) if isinstance(cfg, dict) else {} - -# ------------------------------ -# Public API -# ------------------------------ - -def make_note_payload( - parsed_note: Any, - *, - config: Optional[Dict[str, Any]] = None, - search_root: Union[str, Path, None] = None, - **kwargs: Any, -) -> Dict[str, Any]: - """ - Build the payload for a NOTE. Tolerates extra kwargs (e.g., vault_root, prefix). - """ - fm = _frontmatter(parsed_note) - note_type = fm.get("type") or _get(parsed_note, "type") or "concept" - note_type = str(note_type).strip().lower() - - # Load config and resolve defaults - cfg = _load_types_config(config, search_root) - defaults = _type_defaults(note_type, cfg) - - # retriever_weight: FM > type-defaults > ENV > 1.0 - rw = fm.get("retriever_weight") - if rw is None: - rw = defaults.get("retriever_weight") - if rw is None: - env_rw = os.getenv("MINDNET_DEFAULT_RETRIEVER_WEIGHT") - rw = _coerce_float(env_rw, 1.0) - else: - rw = _coerce_float(rw, 1.0) - - # chunk_profile: FM > type-defaults > ENV > medium - cp = fm.get("chunk_profile") - if cp is None: - cp = defaults.get("chunk_profile") - if cp is None: - cp = os.getenv("MINDNET_DEFAULT_CHUNK_PROFILE", "medium") - cp = _normalize_chunk_profile(cp, "medium") - - # edge_defaults: FM > type-defaults > empty - edge_defs = fm.get("edge_defaults") - if edge_defs is None: - edge_defs = defaults.get("edge_defaults", []) - edge_defs = _coerce_str_list(edge_defs) + retriever_weight = _compute_retriever_weight(note_d, types_cfg, typ) + chunk_profile = _compute_chunk_profile(note_d, types_cfg, typ) + edge_defaults = _compute_edge_defaults(note_d, types_cfg, typ) + # Payload zusammenstellen (nur JSON-fähige Typen) payload: Dict[str, Any] = { - "id": _get(parsed_note, "id"), - "note_id": _get(parsed_note, "id"), - "title": _get(parsed_note, "title"), - "type": note_type, - "retriever_weight": float(rw), - "chunk_profile": cp, - "edge_defaults": edge_defs, - # Useful passthrough/meta (all made JSON-safe) - "path": _safe_jsonable(_get(parsed_note, "path")), - "source": _safe_jsonable(_get(parsed_note, "source")), + "type": typ, + "title": title, + "retriever_weight": float(retriever_weight), + "chunk_profile": str(chunk_profile), + "edge_defaults": edge_defaults, } + if note_id: + payload["note_id"] = note_id + if body: + payload["body_preview"] = body[:5000] # nur Vorschau, Retriever nutzt Chunks - # Include raw frontmatter keys (stringify keys; make safe) + # Frontmatter relevante Keys durchreichen (ohne Binärdaten/Objekte) + fm = note_d.get("frontmatter") or {} if isinstance(fm, dict): for k, v in fm.items(): - # avoid overwriting normalized fields - if k in {"type", "retriever_weight", "chunk_profile", "edge_defaults"}: + if k in ("type", "retriever_weight", "chunk_profile", "edge_defaults"): continue - payload[f"fm_{k}"] = _safe_jsonable(v) + # nur einfache/nützliche Typen durchlassen + if isinstance(v, (str, int, float, bool, list, dict)) or v is None: + payload[f"fm_{k}"] = v - # Remove None values to keep payload clean - payload = {k: v for k, v in payload.items() if v is not None} return payload