All checks were successful
Deploy mindnet to llm-node / deploy (push) Successful in 3s
163 lines
4.4 KiB
Python
163 lines
4.4 KiB
Python
"""
|
|
chunk_payload.py — mindnet core payload builders
|
|
Version: 1.3.1 (2025-11-08)
|
|
|
|
Purpose
|
|
-------
|
|
Build robust chunk payloads for Qdrant upserts.
|
|
This function is intentionally flexible about its signature to remain
|
|
compatible with different callers.
|
|
|
|
Contract
|
|
--------
|
|
make_chunk_payloads(note, chunks, *args, **kwargs) -> List[Dict[str, Any]]
|
|
|
|
Each returned item contains at least:
|
|
- note_id (str)
|
|
- title (str)
|
|
- type (str)
|
|
- path (str or None)
|
|
- tags (List[str])
|
|
- chunk_index (int)
|
|
- text (str)
|
|
- retriever_weight (float or None) # if available
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
from pathlib import Path
|
|
from typing import Any, Dict, Iterable, List, Mapping, Optional, Sequence, Union
|
|
|
|
|
|
def _get(obj: Any, key: str, default: Any = None) -> Any:
|
|
if obj is None:
|
|
return default
|
|
if isinstance(obj, Mapping):
|
|
return obj.get(key, default)
|
|
return getattr(obj, key, default)
|
|
|
|
|
|
def _get_frontmatter(note: Any) -> Mapping[str, Any]:
|
|
fm = _get(note, "frontmatter", {})
|
|
if isinstance(fm, Mapping):
|
|
return fm
|
|
return {}
|
|
|
|
|
|
def _resolve_retriever_weight(explicit: Any, fm: Mapping[str, Any]) -> Optional[float]:
|
|
def to_float(v: Any) -> Optional[float]:
|
|
try:
|
|
if v is None:
|
|
return None
|
|
return float(v)
|
|
except Exception:
|
|
return None
|
|
|
|
if explicit is not None:
|
|
return to_float(explicit)
|
|
|
|
if "retriever_weight" in fm:
|
|
return to_float(fm.get("retriever_weight"))
|
|
|
|
retr = fm.get("retriever")
|
|
if isinstance(retr, Mapping) and "weight" in retr:
|
|
return to_float(retr.get("weight"))
|
|
|
|
return None
|
|
|
|
|
|
def _to_rel_path(abs_path: Optional[Union[str, Path]], vault_root: Optional[Union[str, Path]]) -> Optional[str]:
|
|
if abs_path is None:
|
|
return None
|
|
try:
|
|
p = Path(abs_path)
|
|
if vault_root:
|
|
try:
|
|
rp = p.relative_to(Path(vault_root))
|
|
return str(rp)
|
|
except Exception:
|
|
return str(p)
|
|
return str(p)
|
|
except Exception:
|
|
return str(abs_path)
|
|
|
|
|
|
def _coerce_chunks(chunks_obj: Any) -> List[Any]:
|
|
"""Accept lists of dicts/objects or generators; coerce to list safely."""
|
|
if chunks_obj is None:
|
|
return []
|
|
if isinstance(chunks_obj, list):
|
|
return chunks_obj
|
|
try:
|
|
return list(chunks_obj)
|
|
except Exception:
|
|
return []
|
|
|
|
|
|
def _get_chunk_text(c: Any) -> str:
|
|
for key in ("text", "chunk", "body", "content"):
|
|
v = _get(c, key)
|
|
if isinstance(v, str) and v.strip():
|
|
return v
|
|
# last resort: string repr
|
|
return str(c) if c is not None else ""
|
|
|
|
|
|
def make_chunk_payloads(
|
|
*args: Any,
|
|
**kwargs: Any,
|
|
) -> List[Dict[str, Any]]:
|
|
"""
|
|
Flexible signature for backward/forward compatibility.
|
|
Expected positional args:
|
|
args[0] -> note (ParsedNote or Mapping)
|
|
args[1] -> chunks (Iterable)
|
|
args[2] -> (optional) config/ignored
|
|
Recognized kwargs:
|
|
- vault_root: base path for relative paths (optional)
|
|
- retriever_weight: explicit override (optional)
|
|
"""
|
|
if not args:
|
|
raise TypeError("make_chunk_payloads(note, chunks, *_) requires at least (note, chunks).")
|
|
|
|
note = args[0]
|
|
chunks = args[1] if len(args) > 1 else kwargs.get("chunks")
|
|
chunks_list = _coerce_chunks(chunks)
|
|
|
|
vault_root = kwargs.get("vault_root")
|
|
explicit_weight = kwargs.get("retriever_weight")
|
|
|
|
fm = _get_frontmatter(note)
|
|
|
|
note_id = _get(note, "note_id") or _get(note, "id") or fm.get("id")
|
|
title = _get(note, "title") or fm.get("title")
|
|
ntype = _get(note, "type") or fm.get("type")
|
|
tags = _get(note, "tags") or fm.get("tags") or []
|
|
if not isinstance(tags, list):
|
|
tags = list(tags) if tags else []
|
|
|
|
path_val = _get(note, "path") or _get(note, "abs_path") or fm.get("path")
|
|
rweight = _resolve_retriever_weight(explicit_weight, fm)
|
|
|
|
base = {
|
|
"note_id": note_id,
|
|
"title": title,
|
|
"type": ntype,
|
|
"tags": tags,
|
|
"path": _to_rel_path(path_val, vault_root),
|
|
"retriever_weight": rweight,
|
|
}
|
|
|
|
payloads: List[Dict[str, Any]] = []
|
|
for idx, ch in enumerate(chunks_list):
|
|
text = _get_chunk_text(ch)
|
|
item = dict(base)
|
|
item.update(
|
|
{
|
|
"chunk_index": idx,
|
|
"text": text,
|
|
}
|
|
)
|
|
payloads.append(item)
|
|
|
|
return payloads
|