""" chunk_payload.py — Mindnet payload builder (Chunks) Version: 1.3.0 (2025-11-09) Purpose ------- Build Qdrant-compatible JSON payloads for *chunks* of a parsed note. Tolerant to different call signatures and accepts both dict-like and object-like inputs. Key features ------------ - Reads type defaults from `config/config.yaml` or `config/types.yaml` (same schema). - Resolves fields with precedence: Frontmatter > type-defaults > ENV > fallback. - Sets per chunk: * `note_id`, `note_title`, `type` * `retriever_weight` (float) * `chunk_profile` (short|medium|long) * `text` (never empty: falls back to whole note body/text) * `order`, `section`, `start`, `end` (if available) - Backwards-compatible signature: accepts **kwargs to swallow unknown args. Input ----- `parsed_note` may be: - dict with keys: id, title, body/text, chunks(list), frontmatter(dict), type - object with equivalent attributes Each chunk may be dict-like or object-like with keys/attrs such as: id, text, order, section, start, end """ from __future__ import annotations import os from pathlib import Path from typing import Any, Dict, List, Optional, Union try: import yaml # type: ignore except Exception: # pragma: no cover yaml = None def _get(obj: Any, key: str, default: Any = None) -> Any: if isinstance(obj, dict): return obj.get(key, default) return getattr(obj, key, default) def _frontmatter(obj: Any) -> Dict[str, Any]: fm = _get(obj, "frontmatter", {}) or {} return fm if isinstance(fm, dict) else {} def _coerce_float(val: Any, default: float) -> float: try: if val is None: return default if isinstance(val, (int, float)): return float(val) if isinstance(val, str) and val.strip(): return float(val.strip()) except Exception: pass return default def _normalize_chunk_profile(val: Any, fallback: str = "medium") -> str: if not isinstance(val, str): return fallback v = val.strip().lower() if v in {"short", "medium", "long"}: return v return fallback def _safe_text(s: Any) -> str: if s is None: return "" if isinstance(s, str): return s return str(s) def _load_types_config( explicit_config: Optional[Dict[str, Any]] = None, search_root: Union[str, Path, None] = None, ) -> Dict[str, Any]: if explicit_config and isinstance(explicit_config, dict): if "types" in explicit_config and isinstance(explicit_config["types"], dict): return explicit_config if yaml is None: return {"types": {}} candidates: List[Path] = [] root = Path(search_root) if search_root else Path.cwd() candidates.append(root / "config" / "config.yaml") candidates.append(root / "config" / "types.yaml") candidates.append(Path.cwd() / "config" / "config.yaml") candidates.append(Path.cwd() / "config" / "types.yaml") for p in candidates: try: if p.exists(): import yaml as _y with p.open("r", encoding="utf-8") as f: loaded = _y.safe_load(f) or {} if isinstance(loaded, dict) and isinstance(loaded.get("types"), dict): return {"types": loaded["types"]} except Exception: continue return {"types": {}} def _type_defaults(note_type: str, cfg: Dict[str, Any]) -> Dict[str, Any]: return (cfg.get("types") or {}).get(note_type, {}) if isinstance(cfg, dict) else {} def make_chunk_payloads( parsed_note: Any, config: Optional[Dict[str, Any]] = None, **kwargs: Any, ) -> List[Dict[str, Any]]: search_root = kwargs.get("search_root") fm = _frontmatter(parsed_note) note_type = fm.get("type") or _get(parsed_note, "type") or "concept" note_type = str(note_type).strip().lower() cfg = _load_types_config(config, search_root) defaults = _type_defaults(note_type, cfg) # Resolve retriever_weight: FM > type-defaults > ENV > 1.0 rw = fm.get("retriever_weight") if rw is None: rw = defaults.get("retriever_weight") if rw is None: env_rw = os.getenv("MINDNET_DEFAULT_RETRIEVER_WEIGHT") rw = _coerce_float(env_rw, 1.0) else: rw = _coerce_float(rw, 1.0) # Resolve chunk_profile: FM > type-defaults > ENV > medium cp = fm.get("chunk_profile") if cp is None: cp = defaults.get("chunk_profile") if cp is None: cp = os.getenv("MINDNET_DEFAULT_CHUNK_PROFILE", "medium") cp = _normalize_chunk_profile(cp, "medium") note_id = _get(parsed_note, "id") note_title = _get(parsed_note, "title") body = _get(parsed_note, "body") or _get(parsed_note, "text") or "" items = _get(parsed_note, "chunks") or [] payloads: List[Dict[str, Any]] = [] if not items: items = [{ "id": f"{note_id}::0" if note_id else None, "text": body, "order": 0, "section": None, "start": 0, "end": len(body) if isinstance(body, str) else None, }] for ch in items: text = _safe_text(_get(ch, "text")) if not text: text = _safe_text(body) payload = { "note_id": note_id, "note_title": note_title, "type": note_type, "retriever_weight": float(rw), "chunk_profile": cp, "text": text, "order": _get(ch, "order"), "section": _get(ch, "section"), "start": _get(ch, "start"), "end": _get(ch, "end"), "chunk_id": _get(ch, "id"), } payload = {k: v for k, v in payload.items() if v is not None} payloads.append(payload) return payloads