""" chunk_payload.py — mindnet core payload builders Version: 1.3.1 (2025-11-08) Purpose ------- Build robust chunk payloads for Qdrant upserts. This function is intentionally flexible about its signature to remain compatible with different callers. Contract -------- make_chunk_payloads(note, chunks, *args, **kwargs) -> List[Dict[str, Any]] Each returned item contains at least: - note_id (str) - title (str) - type (str) - path (str or None) - tags (List[str]) - chunk_index (int) - text (str) - retriever_weight (float or None) # if available """ from __future__ import annotations from pathlib import Path from typing import Any, Dict, Iterable, List, Mapping, Optional, Sequence, Union def _get(obj: Any, key: str, default: Any = None) -> Any: if obj is None: return default if isinstance(obj, Mapping): return obj.get(key, default) return getattr(obj, key, default) def _get_frontmatter(note: Any) -> Mapping[str, Any]: fm = _get(note, "frontmatter", {}) if isinstance(fm, Mapping): return fm return {} def _resolve_retriever_weight(explicit: Any, fm: Mapping[str, Any]) -> Optional[float]: def to_float(v: Any) -> Optional[float]: try: if v is None: return None return float(v) except Exception: return None if explicit is not None: return to_float(explicit) if "retriever_weight" in fm: return to_float(fm.get("retriever_weight")) retr = fm.get("retriever") if isinstance(retr, Mapping) and "weight" in retr: return to_float(retr.get("weight")) return None def _to_rel_path(abs_path: Optional[Union[str, Path]], vault_root: Optional[Union[str, Path]]) -> Optional[str]: if abs_path is None: return None try: p = Path(abs_path) if vault_root: try: rp = p.relative_to(Path(vault_root)) return str(rp) except Exception: return str(p) return str(p) except Exception: return str(abs_path) def _coerce_chunks(chunks_obj: Any) -> List[Any]: """Accept lists of dicts/objects or generators; coerce to list safely.""" if chunks_obj is None: return [] if isinstance(chunks_obj, list): return chunks_obj try: return list(chunks_obj) except Exception: return [] def _get_chunk_text(c: Any) -> str: for key in ("text", "chunk", "body", "content"): v = _get(c, key) if isinstance(v, str) and v.strip(): return v # last resort: string repr return str(c) if c is not None else "" def make_chunk_payloads( *args: Any, **kwargs: Any, ) -> List[Dict[str, Any]]: """ Flexible signature for backward/forward compatibility. Expected positional args: args[0] -> note (ParsedNote or Mapping) args[1] -> chunks (Iterable) args[2] -> (optional) config/ignored Recognized kwargs: - vault_root: base path for relative paths (optional) - retriever_weight: explicit override (optional) """ if not args: raise TypeError("make_chunk_payloads(note, chunks, *_) requires at least (note, chunks).") note = args[0] chunks = args[1] if len(args) > 1 else kwargs.get("chunks") chunks_list = _coerce_chunks(chunks) vault_root = kwargs.get("vault_root") explicit_weight = kwargs.get("retriever_weight") fm = _get_frontmatter(note) note_id = _get(note, "note_id") or _get(note, "id") or fm.get("id") title = _get(note, "title") or fm.get("title") ntype = _get(note, "type") or fm.get("type") tags = _get(note, "tags") or fm.get("tags") or [] if not isinstance(tags, list): tags = list(tags) if tags else [] path_val = _get(note, "path") or _get(note, "abs_path") or fm.get("path") rweight = _resolve_retriever_weight(explicit_weight, fm) base = { "note_id": note_id, "title": title, "type": ntype, "tags": tags, "path": _to_rel_path(path_val, vault_root), "retriever_weight": rweight, } payloads: List[Dict[str, Any]] = [] for idx, ch in enumerate(chunks_list): text = _get_chunk_text(ch) item = dict(base) item.update( { "chunk_index": idx, "text": text, } ) payloads.append(item) return payloads