All checks were successful
Deploy mindnet to llm-node / deploy (push) Successful in 3s
159 lines
5.6 KiB
Python
159 lines
5.6 KiB
Python
from __future__ import annotations
|
|
from typing import Any, Dict, Iterable, List, Optional, Union
|
|
|
|
# ---- Helpers ----
|
|
def _coerce_float(val: Any) -> Optional[float]:
|
|
if val is None:
|
|
return None
|
|
try:
|
|
if isinstance(val, (int, float)):
|
|
return float(val)
|
|
if isinstance(val, str):
|
|
v = val.strip()
|
|
if not v:
|
|
return None
|
|
return float(v.replace(",", "."))
|
|
except Exception:
|
|
return None
|
|
return None
|
|
|
|
def _extract_weight(frontmatter: Dict[str, Any], explicit: Optional[float]) -> Optional[float]:
|
|
if explicit is not None:
|
|
return _coerce_float(explicit)
|
|
if frontmatter is None:
|
|
return None
|
|
if "retriever_weight" in frontmatter:
|
|
return _coerce_float(frontmatter.get("retriever_weight"))
|
|
# also accept nested style: retriever: { weight: 0.8 }
|
|
retriever = frontmatter.get("retriever")
|
|
if isinstance(retriever, dict) and "weight" in retriever:
|
|
return _coerce_float(retriever.get("weight"))
|
|
return None
|
|
|
|
def _ensure_list(x: Any) -> List[Any]:
|
|
if x is None:
|
|
return []
|
|
if isinstance(x, list):
|
|
return x
|
|
return [x]
|
|
|
|
def _resolve_note_id(frontmatter: Dict[str, Any], kw_note_id: Optional[str]) -> Optional[str]:
|
|
if kw_note_id:
|
|
return kw_note_id
|
|
if not isinstance(frontmatter, dict):
|
|
return None
|
|
return frontmatter.get("id") or frontmatter.get("note_id")
|
|
|
|
def _base_fields(frontmatter: Dict[str, Any], note_id: Optional[str], path: str) -> Dict[str, Any]:
|
|
title = None
|
|
typ = None
|
|
tags = None
|
|
if isinstance(frontmatter, dict):
|
|
title = frontmatter.get("title")
|
|
typ = frontmatter.get("type") or frontmatter.get("note_type")
|
|
# tags can be list[str] or comma separated string
|
|
tags = frontmatter.get("tags")
|
|
if isinstance(tags, str):
|
|
tags = [t.strip() for t in tags.split(",") if t.strip()]
|
|
return {
|
|
"note_id": note_id,
|
|
"title": title,
|
|
"type": typ,
|
|
"tags": tags,
|
|
"path": path or None,
|
|
}
|
|
|
|
# ---- Public API ----
|
|
def make_chunk_payloads(
|
|
frontmatter: Dict[str, Any],
|
|
*args,
|
|
note_id: Optional[str] = None,
|
|
chunks: Optional[Iterable[Any]] = None,
|
|
path: str = "",
|
|
chunk_profile: Optional[str] = None,
|
|
retriever_weight: Optional[float] = None,
|
|
**kwargs,
|
|
) -> List[Dict[str, Any]]:
|
|
"""
|
|
Build chunk payload dictionaries for Qdrant.
|
|
|
|
This function is intentionally permissive to stay compatible with older callers:
|
|
- If `chunks` is a list of dictionaries that already contain payload-like fields,
|
|
those are augmented.
|
|
- If `chunks` is a list of strings, minimal payloads are created.
|
|
- If `chunks` is a list of dicts with keys like `text`, `window`, or `index`, they are normalized.
|
|
|
|
Always injects `retriever_weight` into each payload when available (from explicit arg or frontmatter).
|
|
"""
|
|
# Backward-compat for callers that might pass via kwargs
|
|
if chunks is None:
|
|
chunks = kwargs.get("payloads") or kwargs.get("pls") or kwargs.get("items") or kwargs.get("chunk_items")
|
|
|
|
note_id_resolved = _resolve_note_id(frontmatter, note_id)
|
|
weight = _extract_weight(frontmatter, retriever_weight)
|
|
base = _base_fields(frontmatter, note_id_resolved, path)
|
|
|
|
out: List[Dict[str, Any]] = []
|
|
for idx, item in enumerate(_ensure_list(chunks)):
|
|
# Case A: already a full payload dict (heuristic: has 'text' or 'window' or 'note_id' keys)
|
|
if isinstance(item, dict) and ("text" in item or "window" in item or "note_id" in item):
|
|
pl = dict(item) # shallow copy
|
|
# ensure base fields exist if missing
|
|
for k, v in base.items():
|
|
pl.setdefault(k, v)
|
|
# ensure chunk_index if not present
|
|
pl.setdefault("chunk_index", item.get("index", idx))
|
|
# inject retriever_weight
|
|
if weight is not None:
|
|
pl["retriever_weight"] = weight
|
|
out.append(pl)
|
|
continue
|
|
|
|
# Case B: item is a dict with nested 'payload'
|
|
if isinstance(item, dict) and "payload" in item and isinstance(item["payload"], dict):
|
|
pl = dict(item["payload"])
|
|
for k, v in base.items():
|
|
pl.setdefault(k, v)
|
|
pl.setdefault("chunk_index", pl.get("index", idx))
|
|
if weight is not None:
|
|
pl["retriever_weight"] = weight
|
|
out.append(pl)
|
|
continue
|
|
|
|
# Case C: item is a plain string -> treat as text (no window context)
|
|
if isinstance(item, str):
|
|
text_val = item
|
|
pl = {
|
|
**base,
|
|
"chunk_index": idx,
|
|
"text": text_val,
|
|
"window": text_val,
|
|
}
|
|
if weight is not None:
|
|
pl["retriever_weight"] = weight
|
|
out.append(pl)
|
|
continue
|
|
|
|
# Case D: item has 'text'/'window' under different names
|
|
if isinstance(item, dict):
|
|
text_val = item.get("text") or item.get("body") or item.get("content") or ""
|
|
window_val = item.get("window") or text_val
|
|
pl = {
|
|
**base,
|
|
"chunk_index": item.get("chunk_index", item.get("index", idx)),
|
|
"text": text_val,
|
|
"window": window_val,
|
|
}
|
|
if weight is not None:
|
|
pl["retriever_weight"] = weight
|
|
out.append(pl)
|
|
continue
|
|
|
|
# Fallback: minimal payload
|
|
pl = {**base, "chunk_index": idx}
|
|
if weight is not None:
|
|
pl["retriever_weight"] = weight
|
|
out.append(pl)
|
|
|
|
return out
|