""" chunk_payload.py Version: 1.4.2 Description: Builds the payloads for *chunks* of a note destined for the Qdrant "chunks" collection. - Defensive against both dict-like and attribute-like chunk objects. - Accepts extra/legacy arguments via *args / **kwargs (e.g., vault_root, type_defaults). - Ensures "retriever_weight" is present in every chunk payload, derived from the note. - Preserves common chunk metadata (idx, offsets, tokens, section info, etc.). - Tolerates legacy third positional parameter. Public API: make_chunk_payloads(parsed_note, chunks, *_, retriever_weight=None, vault_root=None, type_defaults=None, **__) """ from typing import Any, Dict, Iterable, List, Optional def _as_dict(obj: Any) -> Dict[str, Any]: if isinstance(obj, dict): return obj out = {} for key in ( "frontmatter", "fm", "meta", "note_id", "id", "title", "type", "tags", "aliases", "created", "updated", "date", "abs_path", "path", "rel_path", ): if hasattr(obj, key): out[key] = getattr(obj, key) return out def _get(obj: Any, *keys: str, default: Any=None): if isinstance(obj, dict): for k in keys: if k in obj: return obj[k] return default for k in keys: if hasattr(obj, k): return getattr(obj, k) return default def _as_list(val): if val is None: return None if isinstance(val, (list, tuple)): return list(val) if isinstance(val, str): return [val] try: return list(val) except Exception: return [val] def _coerce_float(val: Any, default: float) -> float: if val is None: return float(default) try: return float(val) except Exception: return float(default) def _clean(d: Dict[str, Any]) -> Dict[str, Any]: return {k: v for k, v in d.items() if v is not None} def make_chunk_payloads( parsed_note: Any, chunks: Iterable[Any], *_, # legacy extra positional parameters tolerated retriever_weight: Optional[float] = None, vault_root: Optional[str] = None, type_defaults: Optional[Dict[str, Dict[str, Any]]] = None, **__, # ignore unexpected kwargs ) -> List[Dict[str, Any]]: nd = _as_dict(parsed_note) fm = _get(nd, "frontmatter", "fm", "meta", default={}) or {} note_id = _get(nd, "note_id", "id") or fm.get("id") title = _get(nd, "title") or fm.get("title") ntype = _get(nd, "type") or fm.get("type") or "concept" # Effective path for source reference abs_path = _get(nd, "abs_path", "path") rel_path = _get(nd, "rel_path") if vault_root and abs_path and not rel_path: try: from pathlib import Path rel_path = str(Path(abs_path).resolve().relative_to(Path(vault_root).resolve())) except Exception: rel_path = _get(nd, "path") or abs_path # Effective chunk_profile chunk_profile = fm.get("chunk_profile") if not chunk_profile and type_defaults and ntype in type_defaults: chunk_profile = type_defaults[ntype].get("chunk_profile") # Resolve retriever_weight once at note level, apply to all chunks if retriever_weight is None: retriever_weight = ( fm.get("retriever_weight") or (fm.get("retriever", {}) or {}).get("weight") ) if retriever_weight is None and type_defaults and ntype in type_defaults: retriever_weight = type_defaults[ntype].get("retriever_weight") retriever_weight = _coerce_float(retriever_weight, default=1.0) out: List[Dict[str, Any]] = [] for i, ch in enumerate(chunks): cd = ch if isinstance(ch, dict) else {} # Basic fields with many aliases chunk_id = _get(ch, "chunk_id", "id", default=None) idx = _get(ch, "idx", "index", default=i) text = _get(ch, "text", "content", "body", "chunk_text", default=None) char_start = _get(ch, "char_start", "start", "begin", default=None) char_end = _get(ch, "char_end", "end", "stop", default=None) token_count = _get(ch, "token_count", "tokens", "n_tokens", default=None) section = _get(ch, "section", "heading", default=None) section_path = _get(ch, "section_path", "hpath", default=None) payload = _clean({ "note_id": note_id, "title": title, "type": ntype, "path": rel_path or abs_path, "chunk_profile": chunk_profile, "retriever_weight": retriever_weight, "chunk_id": chunk_id or (f"{note_id}#ch{idx}" if note_id is not None else None), "chunk_index": idx, "text": text, "char_start": char_start, "char_end": char_end, "token_count": token_count, "section": section, "section_path": section_path, }) # If the chunk object carries an existing mapping of extra metadata, preserve it. if isinstance(ch, dict): # Avoid overwriting the fields we already normalized extras = {k: v for k, v in ch.items() if k not in payload and v is not None} if extras: payload.update(extras) out.append(payload) return out