mindnet/app/core/chunk_payload.py


"""
chunk_payload.py — Mindnet payload helpers
Version: 0.5.2 (generated 2025-11-08 21:03:48)
Purpose:
  - Build CHUNK payloads list while preserving existing chunk fields (text, seq, etc.).
  - Inject into *every* chunk:
      * retriever_weight (resolved like note payload)
      * chunk_profile (resolved like note payload)
Resolution order identical to note_payload.make_note_payload.
Signature tolerant to match existing importers.
"""

from __future__ import annotations
from typing import Any, Dict, List, Optional, Union
from pathlib import Path
import os

try:
    import yaml  # type: ignore
except Exception:  # pragma: no cover
    yaml = None  # will skip YAML loading if unavailable


def _coerce_mapping(obj: Any) -> Dict[str, Any]:
    if obj is None:
        return {{}}
    if isinstance(obj, dict):
        return dict(obj)
    out: Dict[str, Any] = {{}}
    if hasattr(obj, "__dict__"):
        out.update(getattr(obj, "__dict__"))
    for k in ("id","note_id","title","type","path","source_path","frontmatter"):
        if hasattr(obj, k) and k not in out:
            out[k] = getattr(obj, k)
    return out


def _coerce_chunk_dict(obj: Any) -> Dict[str, Any]:
    if isinstance(obj, dict):
        return dict(obj)
    d = {{}}
    # common attributes for a chunk object
    for k in ("chunk_id","id","note_id","seq","start","end","text","title","type","source_path"):
        if hasattr(obj, k):
            d[k] = getattr(obj, k)
    if hasattr(obj, "__dict__"):
        for k,v in obj.__dict__.items():
            d.setdefault(k, v)
    return d


def _get_frontmatter(parsed: Dict[str, Any]) -> Dict[str, Any]:
    fm = parsed.get("frontmatter")
    return dict(fm) if isinstance(fm, dict) else {{}}


def _load_types_from_yaml(types_file: Optional[Union[str, Path]]) -> Dict[str, Any]:
    if types_file is None:
        for cand in (Path("config/types.yaml"), Path("config/types.yml"), Path("config.yaml"), Path("config.yml")):
            if cand.exists():
                types_file = cand
                break
    if types_file is None or yaml is None:
        return {{}}
    p = Path(types_file)
    if not p.exists():
        return {{}}
    try:
        data = yaml.safe_load(p.read_text(encoding="utf-8"))
        if not isinstance(data, dict):
            return {{}}
        if "types" in data and isinstance(data["types"], dict):
            return dict(data["types"])
        return data
    except Exception:
        return {{}}


def _resolve_type_defaults(note_type: Optional[str], types: Optional[Dict[str,Any]]) -> Dict[str, Any]:
    if not note_type or not types or not isinstance(types, dict):
        return {{}}
    block = types.get(note_type)
    return dict(block) if isinstance(block, dict) else {{}}


def _to_float(val: Any, fallback: float) -> float:
    if val is None:
        return fallback
    try:
        return float(val)
    except Exception:
        return fallback


def _first_nonempty(*vals):
    for v in vals:
        if v is not None:
            if isinstance(v, str) and v.strip() == "":
                continue
            return v
    return None


def make_chunk_payloads(parsed_note: Any, chunks: List[Any], **kwargs) -> List[Dict[str, Any]]:
    parsed = _coerce_mapping(parsed_note)
    fm = _get_frontmatter(parsed)

    # external sources
    types_registry = kwargs.get("types") or kwargs.get("types_registry")
    types_from_yaml = _load_types_from_yaml(kwargs.get("types_file"))
    types_all: Dict[str, Any] = types_registry if isinstance(types_registry, dict) else types_from_yaml

    note_type: Optional[str] = _first_nonempty(parsed.get("type"), fm.get("type"))
    type_defaults = _resolve_type_defaults(note_type, types_all)

    env_default = os.getenv("MINDNET_DEFAULT_RETRIEVER_WEIGHT")
    env_default_val = _to_float(env_default, 1.0) if env_default is not None else 1.0

    effective_retriever_weight = _to_float(
        _first_nonempty(
            fm.get("retriever_weight"),
            type_defaults.get("retriever_weight"),
            env_default_val,
            1.0,
        ),
        1.0,
    )

    effective_chunk_profile = _first_nonempty(
        fm.get("chunk_profile"),
        fm.get("profile"),
        type_defaults.get("chunk_profile"),
        os.getenv("MINDNET_DEFAULT_CHUNK_PROFILE"),
    )

    out: List[Dict[str, Any]] = []
    for ch in chunks or []:
        payload = _coerce_chunk_dict(ch)  # preserve all existing chunk fields
        payload["retriever_weight"] = effective_retriever_weight
        if effective_chunk_profile is not None:
            payload["chunk_profile"] = effective_chunk_profile
        out.append(payload)
    return out