""" chunk_payload.py — Mindnet payload helpers Version: 0.5.2 (generated 2025-11-08 21:03:48) Purpose: - Build CHUNK payloads list while preserving existing chunk fields (text, seq, etc.). - Inject into *every* chunk: * retriever_weight (resolved like note payload) * chunk_profile (resolved like note payload) Resolution order identical to note_payload.make_note_payload. Signature tolerant to match existing importers. """ from __future__ import annotations from typing import Any, Dict, List, Optional, Union from pathlib import Path import os try: import yaml # type: ignore except Exception: # pragma: no cover yaml = None # will skip YAML loading if unavailable def _coerce_mapping(obj: Any) -> Dict[str, Any]: if obj is None: return {{}} if isinstance(obj, dict): return dict(obj) out: Dict[str, Any] = {{}} if hasattr(obj, "__dict__"): out.update(getattr(obj, "__dict__")) for k in ("id","note_id","title","type","path","source_path","frontmatter"): if hasattr(obj, k) and k not in out: out[k] = getattr(obj, k) return out def _coerce_chunk_dict(obj: Any) -> Dict[str, Any]: if isinstance(obj, dict): return dict(obj) d = {{}} # common attributes for a chunk object for k in ("chunk_id","id","note_id","seq","start","end","text","title","type","source_path"): if hasattr(obj, k): d[k] = getattr(obj, k) if hasattr(obj, "__dict__"): for k,v in obj.__dict__.items(): d.setdefault(k, v) return d def _get_frontmatter(parsed: Dict[str, Any]) -> Dict[str, Any]: fm = parsed.get("frontmatter") return dict(fm) if isinstance(fm, dict) else {{}} def _load_types_from_yaml(types_file: Optional[Union[str, Path]]) -> Dict[str, Any]: if types_file is None: for cand in (Path("config/types.yaml"), Path("config/types.yml"), Path("config.yaml"), Path("config.yml")): if cand.exists(): types_file = cand break if types_file is None or yaml is None: return {{}} p = Path(types_file) if not p.exists(): return {{}} try: data = yaml.safe_load(p.read_text(encoding="utf-8")) if not isinstance(data, dict): return {{}} if "types" in data and isinstance(data["types"], dict): return dict(data["types"]) return data except Exception: return {{}} def _resolve_type_defaults(note_type: Optional[str], types: Optional[Dict[str,Any]]) -> Dict[str, Any]: if not note_type or not types or not isinstance(types, dict): return {{}} block = types.get(note_type) return dict(block) if isinstance(block, dict) else {{}} def _to_float(val: Any, fallback: float) -> float: if val is None: return fallback try: return float(val) except Exception: return fallback def _first_nonempty(*vals): for v in vals: if v is not None: if isinstance(v, str) and v.strip() == "": continue return v return None def make_chunk_payloads(parsed_note: Any, chunks: List[Any], **kwargs) -> List[Dict[str, Any]]: parsed = _coerce_mapping(parsed_note) fm = _get_frontmatter(parsed) # external sources types_registry = kwargs.get("types") or kwargs.get("types_registry") types_from_yaml = _load_types_from_yaml(kwargs.get("types_file")) types_all: Dict[str, Any] = types_registry if isinstance(types_registry, dict) else types_from_yaml note_type: Optional[str] = _first_nonempty(parsed.get("type"), fm.get("type")) type_defaults = _resolve_type_defaults(note_type, types_all) env_default = os.getenv("MINDNET_DEFAULT_RETRIEVER_WEIGHT") env_default_val = _to_float(env_default, 1.0) if env_default is not None else 1.0 effective_retriever_weight = _to_float( _first_nonempty( fm.get("retriever_weight"), type_defaults.get("retriever_weight"), env_default_val, 1.0, ), 1.0, ) effective_chunk_profile = _first_nonempty( fm.get("chunk_profile"), fm.get("profile"), type_defaults.get("chunk_profile"), os.getenv("MINDNET_DEFAULT_CHUNK_PROFILE"), ) out: List[Dict[str, Any]] = [] for ch in chunks or []: payload = _coerce_chunk_dict(ch) # preserve all existing chunk fields payload["retriever_weight"] = effective_retriever_weight if effective_chunk_profile is not None: payload["chunk_profile"] = effective_chunk_profile out.append(payload) return out