""" note_payload.py — Mindnet payload builder (Notes) Version: 1.3.0 (2025-11-09) Purpose ------- Build Qdrant-compatible JSON payloads for *notes* from a parsed Markdown representation. The function is tolerant to different call signatures and accepts both dict-like and object-like "ParsedNote" inputs. Key features ------------ - Reads type defaults from `config/config.yaml` or `config/types.yaml` (same schema). - Resolves fields with the following precedence: Frontmatter > type-defaults > ENV > hard-coded fallback. - Ensures only JSON-serializable types are included (no sets, Path, callables). - Sets/normalizes: * `type` : note type (e.g., concept, task, experience, project) * `retriever_weight` : float, influences retrieval blending downstream * `chunk_profile` : short | medium | long (string) * `edge_defaults` : list[str], used by edge builder outside of this module - Backwards-compatible signature: accepts **kwargs to swallow unknown args (e.g., vault_root, prefix, ...). Expected input (flexible) ------------------------- `parsed_note` may be: - dict with keys: id, title, body/text, path, frontmatter (dict), type, ... - object with attributes: id, title, body/text, path, frontmatter, type, ... Schema for config files ----------------------- version: 1.0 types: concept: chunk_profile: medium edge_defaults: ["references", "related_to"] retriever_weight: 0.33 task: chunk_profile: short edge_defaults: ["depends_on", "belongs_to"] retriever_weight: 0.8 experience: chunk_profile: medium edge_defaults: ["derived_from", "inspired_by"] retriever_weight: 0.9 project: chunk_profile: long edge_defaults: ["references", "depends_on"] retriever_weight: 0.95 """ from __future__ import annotations import json import os from pathlib import Path from typing import Any, Dict, List, Optional, Union try: import yaml # type: ignore except Exception: # pragma: no cover yaml = None # The caller must ensure PyYAML is installed # ------------------------------ # Helpers # ------------------------------ def _get(obj: Any, key: str, default: Any = None) -> Any: """Get key from dict-like or attribute from object-like.""" if isinstance(obj, dict): return obj.get(key, default) return getattr(obj, key, default) def _frontmatter(obj: Any) -> Dict[str, Any]: fm = _get(obj, "frontmatter", {}) or {} return fm if isinstance(fm, dict) else {} def _coerce_float(val: Any, default: float) -> float: try: if val is None: return default if isinstance(val, (int, float)): return float(val) if isinstance(val, str) and val.strip(): return float(val.strip()) except Exception: pass return default def _normalize_chunk_profile(val: Any, fallback: str = "medium") -> str: if not isinstance(val, str): return fallback v = val.strip().lower() if v in {"short", "medium", "long"}: return v return fallback def _coerce_str_list(val: Any) -> List[str]: if val is None: return [] if isinstance(val, list): out: List[str] = [] for x in val: if isinstance(x, str): out.append(x) else: out.append(str(x)) return out if isinstance(val, str): # allow comma-separated return [x.strip() for x in val.split(",") if x.strip()] return [] def _safe_jsonable(value: Any) -> Any: """Ensure value is JSON-serializable (no sets, Path, callables, etc.).""" if isinstance(value, (str, int, float, bool)) or value is None: return value if isinstance(value, list): return [_safe_jsonable(v) for v in value] if isinstance(value, dict): return {str(k): _safe_jsonable(v) for k, v in value.items()} if isinstance(value, Path): return str(value) # Avoid sets and other iterables that are not JSON-serializable try: json.dumps(value) return value except Exception: return str(value) # ------------------------------ # Config loading # ------------------------------ def _load_types_config( explicit_config: Optional[Dict[str, Any]] = None, search_root: Union[str, Path, None] = None, ) -> Dict[str, Any]: """ Load types config from: 1) explicit_config (if provided) 2) {search_root}/config/config.yaml 3) {search_root}/config/types.yaml 4) ./config/config.yaml 5) ./config/types.yaml Returns a dict with shape: {"types": {...}} (empty if none found). """ if explicit_config and isinstance(explicit_config, dict): if "types" in explicit_config and isinstance(explicit_config["types"], dict): return explicit_config candidates: List[Path] = [] root = Path(search_root) if search_root else Path.cwd() candidates.append(root / "config" / "config.yaml") candidates.append(root / "config" / "types.yaml") # fallback to CWD when search_root was different candidates.append(Path.cwd() / "config" / "config.yaml") candidates.append(Path.cwd() / "config" / "types.yaml") data = {} if yaml is None: return {"types": {}} for p in candidates: try: if p.exists(): with p.open("r", encoding="utf-8") as f: loaded = yaml.safe_load(f) or {} if isinstance(loaded, dict) and isinstance(loaded.get("types"), dict): data = {"types": loaded["types"]} break except Exception: continue if not data: data = {"types": {}} return data def _type_defaults(note_type: str, cfg: Dict[str, Any]) -> Dict[str, Any]: return (cfg.get("types") or {}).get(note_type, {}) if isinstance(cfg, dict) else {} # ------------------------------ # Public API # ------------------------------ def make_note_payload( parsed_note: Any, *, config: Optional[Dict[str, Any]] = None, search_root: Union[str, Path, None] = None, **kwargs: Any, ) -> Dict[str, Any]: """ Build the payload for a NOTE. Tolerates extra kwargs (e.g., vault_root, prefix). """ fm = _frontmatter(parsed_note) note_type = fm.get("type") or _get(parsed_note, "type") or "concept" note_type = str(note_type).strip().lower() # Load config and resolve defaults cfg = _load_types_config(config, search_root) defaults = _type_defaults(note_type, cfg) # retriever_weight: FM > type-defaults > ENV > 1.0 rw = fm.get("retriever_weight") if rw is None: rw = defaults.get("retriever_weight") if rw is None: env_rw = os.getenv("MINDNET_DEFAULT_RETRIEVER_WEIGHT") rw = _coerce_float(env_rw, 1.0) else: rw = _coerce_float(rw, 1.0) # chunk_profile: FM > type-defaults > ENV > medium cp = fm.get("chunk_profile") if cp is None: cp = defaults.get("chunk_profile") if cp is None: cp = os.getenv("MINDNET_DEFAULT_CHUNK_PROFILE", "medium") cp = _normalize_chunk_profile(cp, "medium") # edge_defaults: FM > type-defaults > empty edge_defs = fm.get("edge_defaults") if edge_defs is None: edge_defs = defaults.get("edge_defaults", []) edge_defs = _coerce_str_list(edge_defs) payload: Dict[str, Any] = { "id": _get(parsed_note, "id"), "note_id": _get(parsed_note, "id"), "title": _get(parsed_note, "title"), "type": note_type, "retriever_weight": float(rw), "chunk_profile": cp, "edge_defaults": edge_defs, # Useful passthrough/meta (all made JSON-safe) "path": _safe_jsonable(_get(parsed_note, "path")), "source": _safe_jsonable(_get(parsed_note, "source")), } # Include raw frontmatter keys (stringify keys; make safe) if isinstance(fm, dict): for k, v in fm.items(): # avoid overwriting normalized fields if k in {"type", "retriever_weight", "chunk_profile", "edge_defaults"}: continue payload[f"fm_{k}"] = _safe_jsonable(v) # Remove None values to keep payload clean payload = {k: v for k, v in payload.items() if v is not None} return payload