mindnet/app/core/chunk_payload.py
Lars a686bdbeaf
All checks were successful
Deploy mindnet to llm-node / deploy (push) Successful in 3s
Dateien nach "app/core" hochladen
2025-11-08 21:31:24 +01:00

163 lines
4.4 KiB
Python

"""
chunk_payload.py — mindnet core payload builders
Version: 1.3.1 (2025-11-08)
Purpose
-------
Build robust chunk payloads for Qdrant upserts.
This function is intentionally flexible about its signature to remain
compatible with different callers.
Contract
--------
make_chunk_payloads(note, chunks, *args, **kwargs) -> List[Dict[str, Any]]
Each returned item contains at least:
- note_id (str)
- title (str)
- type (str)
- path (str or None)
- tags (List[str])
- chunk_index (int)
- text (str)
- retriever_weight (float or None) # if available
"""
from __future__ import annotations
from pathlib import Path
from typing import Any, Dict, Iterable, List, Mapping, Optional, Sequence, Union
def _get(obj: Any, key: str, default: Any = None) -> Any:
if obj is None:
return default
if isinstance(obj, Mapping):
return obj.get(key, default)
return getattr(obj, key, default)
def _get_frontmatter(note: Any) -> Mapping[str, Any]:
fm = _get(note, "frontmatter", {})
if isinstance(fm, Mapping):
return fm
return {}
def _resolve_retriever_weight(explicit: Any, fm: Mapping[str, Any]) -> Optional[float]:
def to_float(v: Any) -> Optional[float]:
try:
if v is None:
return None
return float(v)
except Exception:
return None
if explicit is not None:
return to_float(explicit)
if "retriever_weight" in fm:
return to_float(fm.get("retriever_weight"))
retr = fm.get("retriever")
if isinstance(retr, Mapping) and "weight" in retr:
return to_float(retr.get("weight"))
return None
def _to_rel_path(abs_path: Optional[Union[str, Path]], vault_root: Optional[Union[str, Path]]) -> Optional[str]:
if abs_path is None:
return None
try:
p = Path(abs_path)
if vault_root:
try:
rp = p.relative_to(Path(vault_root))
return str(rp)
except Exception:
return str(p)
return str(p)
except Exception:
return str(abs_path)
def _coerce_chunks(chunks_obj: Any) -> List[Any]:
"""Accept lists of dicts/objects or generators; coerce to list safely."""
if chunks_obj is None:
return []
if isinstance(chunks_obj, list):
return chunks_obj
try:
return list(chunks_obj)
except Exception:
return []
def _get_chunk_text(c: Any) -> str:
for key in ("text", "chunk", "body", "content"):
v = _get(c, key)
if isinstance(v, str) and v.strip():
return v
# last resort: string repr
return str(c) if c is not None else ""
def make_chunk_payloads(
*args: Any,
**kwargs: Any,
) -> List[Dict[str, Any]]:
"""
Flexible signature for backward/forward compatibility.
Expected positional args:
args[0] -> note (ParsedNote or Mapping)
args[1] -> chunks (Iterable)
args[2] -> (optional) config/ignored
Recognized kwargs:
- vault_root: base path for relative paths (optional)
- retriever_weight: explicit override (optional)
"""
if not args:
raise TypeError("make_chunk_payloads(note, chunks, *_) requires at least (note, chunks).")
note = args[0]
chunks = args[1] if len(args) > 1 else kwargs.get("chunks")
chunks_list = _coerce_chunks(chunks)
vault_root = kwargs.get("vault_root")
explicit_weight = kwargs.get("retriever_weight")
fm = _get_frontmatter(note)
note_id = _get(note, "note_id") or _get(note, "id") or fm.get("id")
title = _get(note, "title") or fm.get("title")
ntype = _get(note, "type") or fm.get("type")
tags = _get(note, "tags") or fm.get("tags") or []
if not isinstance(tags, list):
tags = list(tags) if tags else []
path_val = _get(note, "path") or _get(note, "abs_path") or fm.get("path")
rweight = _resolve_retriever_weight(explicit_weight, fm)
base = {
"note_id": note_id,
"title": title,
"type": ntype,
"tags": tags,
"path": _to_rel_path(path_val, vault_root),
"retriever_weight": rweight,
}
payloads: List[Dict[str, Any]] = []
for idx, ch in enumerate(chunks_list):
text = _get_chunk_text(ch)
item = dict(base)
item.update(
{
"chunk_index": idx,
"text": text,
}
)
payloads.append(item)
return payloads