Dateien nach "app/core" hochladen
All checks were successful
Deploy mindnet to llm-node / deploy (push) Successful in 3s

This commit is contained in:
Lars 2025-11-08 21:31:24 +01:00
parent 2b84c62875
commit a686bdbeaf
2 changed files with 217 additions and 315 deletions

View File

@ -1,218 +1,162 @@
# app/core/chunk_payload.py """
# Version: 1.2.0 (2025-11-08) chunk_payload.py mindnet core payload builders
# Purpose: Version: 1.3.1 (2025-11-08)
# Build robust Qdrant payloads for CHUNK points.
#
# Highlights:
# - Works with dict-like chunks and simple objects; supports (text, idx) tuples.
# - Accepts legacy/extra kwargs (e.g., vault_root) without failing.
# - Copies canonical note fields onto each chunk (note_id/title/type/tags/path).
# - Sets 'text' and 'chunk_index' per chunk.
# - Reliably propagates `retriever_weight` onto every chunk if provided in
# frontmatter or explicitly.
#
# Usage:
# payloads = make_chunk_payloads(note, chunks, retriever_weight=None, base_payload=None, vault_root="/path/to/vault")
#
# Changelog:
# 1.2.0 (2025-11-08) Accept legacy kwargs, robust getters, propagate retriever_weight.
# 1.1.0 (2025-11-08) Initial robust rewrite with attribute/dict support.
Purpose
-------
Build robust chunk payloads for Qdrant upserts.
This function is intentionally flexible about its signature to remain
compatible with different callers.
Contract
--------
make_chunk_payloads(note, chunks, *args, **kwargs) -> List[Dict[str, Any]]
Each returned item contains at least:
- note_id (str)
- title (str)
- type (str)
- path (str or None)
- tags (List[str])
- chunk_index (int)
- text (str)
- retriever_weight (float or None) # if available
"""
from __future__ import annotations from __future__ import annotations
from pathlib import Path from pathlib import Path
from typing import Any, Dict, Iterable, List, Optional, Tuple from typing import Any, Dict, Iterable, List, Mapping, Optional, Sequence, Union
def _get(obj: Any, key: str, default: Any = None) -> Any: def _get(obj: Any, key: str, default: Any = None) -> Any:
if obj is None: if obj is None:
return default return default
if hasattr(obj, key): if isinstance(obj, Mapping):
try: return obj.get(key, default)
val = getattr(obj, key) return getattr(obj, key, default)
return default if val is None else val
except Exception:
pass
if isinstance(obj, dict):
if key in obj:
val = obj.get(key, default)
return default if val is None else val
return default
def _get_frontmatter(note: Any) -> Dict[str, Any]: def _get_frontmatter(note: Any) -> Mapping[str, Any]:
fm = _get(note, "frontmatter", None) fm = _get(note, "frontmatter", {})
if isinstance(fm, dict): if isinstance(fm, Mapping):
return fm return fm
meta = _get(note, "meta", None)
if isinstance(meta, dict) and isinstance(meta.get("frontmatter"), dict):
return meta["frontmatter"]
return {} return {}
def _get_from_frontmatter(fm: Dict[str, Any], key: str, default: Any = None) -> Any: def _resolve_retriever_weight(explicit: Any, fm: Mapping[str, Any]) -> Optional[float]:
if not isinstance(fm, dict): def to_float(v: Any) -> Optional[float]:
return default
if key in fm:
val = fm.get(key, default)
return default if val is None else val
return default
def _coerce_tags(val: Any) -> List[str]:
if val is None:
return []
if isinstance(val, list):
return [str(x) for x in val]
if isinstance(val, str):
parts = [t.strip() for t in val.split(",")]
return [p for p in parts if p]
return []
def _resolve_retriever_weight(fm: Dict[str, Any], explicit: Optional[float]) -> Optional[float]:
if explicit is not None:
try: try:
return float(explicit) if v is None:
except Exception:
return None return None
val = _get_from_frontmatter(fm, "retriever_weight", None)
if isinstance(val, (int, float)):
return float(val)
retr = fm.get("retriever")
if isinstance(retr, dict):
v = retr.get("weight")
if isinstance(v, (int, float)):
return float(v) return float(v)
except Exception:
return None
if explicit is not None:
return to_float(explicit)
if "retriever_weight" in fm:
return to_float(fm.get("retriever_weight"))
retr = fm.get("retriever")
if isinstance(retr, Mapping) and "weight" in retr:
return to_float(retr.get("weight"))
return None return None
def _resolve_path(note: Any, fm: Dict[str, Any], vault_root: Optional[str]) -> Optional[str]: def _to_rel_path(abs_path: Optional[Union[str, Path]], vault_root: Optional[Union[str, Path]]) -> Optional[str]:
path = _get_from_frontmatter(fm, "path", None) if abs_path is None:
if path is None:
path = _get(note, "path", None) or _get(note, "source", None) or _get(note, "filepath", None)
if path is None:
return None return None
try: try:
p = Path(abs_path)
if vault_root: if vault_root:
vr = Path(vault_root)
rel = Path(path)
try: try:
return str(rel.relative_to(vr)) rp = p.relative_to(Path(vault_root))
return str(rp)
except Exception: except Exception:
return str(rel) return str(p)
return str(p)
except Exception: except Exception:
pass return str(abs_path)
return str(path)
def _resolve_note_fields(note: Any, vault_root: Optional[str]) -> Dict[str, Any]: def _coerce_chunks(chunks_obj: Any) -> List[Any]:
fm = _get_frontmatter(note) """Accept lists of dicts/objects or generators; coerce to list safely."""
note_id = _get_from_frontmatter(fm, "id", None) or _get(note, "note_id", None) or _get(note, "id", None) if chunks_obj is None:
title = _get_from_frontmatter(fm, "title", None) or _get(note, "title", None) return []
ntype = _get_from_frontmatter(fm, "type", None) or _get(note, "type", None) if isinstance(chunks_obj, list):
tags = _coerce_tags(_get_from_frontmatter(fm, "tags", None) or _get(note, "tags", None)) return chunks_obj
path = _resolve_path(note, fm, vault_root) try:
return {"note_id": note_id, "title": title, "type": ntype, "tags": tags, "path": path, "frontmatter": fm} return list(chunks_obj)
except Exception:
return []
def _extract_chunk_text_and_index(chunk: Any, fallback_index: int) -> Tuple[str, int]: def _get_chunk_text(c: Any) -> str:
# (text, idx) tuple for key in ("text", "chunk", "body", "content"):
if isinstance(chunk, tuple) and len(chunk) == 2 and isinstance(chunk[0], str): v = _get(c, key)
txt, idx = chunk if isinstance(v, str) and v.strip():
try: return v
idx_int = int(idx) # last resort: string repr
except Exception: return str(c) if c is not None else ""
idx_int = fallback_index
return txt, idx_int
# string
if isinstance(chunk, str):
return chunk, fallback_index
# dict
if isinstance(chunk, dict):
txt = chunk.get("text") or chunk.get("window") or chunk.get("body") or chunk.get("content")
if isinstance(txt, str):
idx = chunk.get("index")
try:
idx_int = int(idx) if idx is not None else fallback_index
except Exception:
idx_int = fallback_index
return txt, idx_int
# object with attributes
for attr in ("text", "window", "body", "content"):
if hasattr(chunk, attr):
try:
txt = getattr(chunk, attr)
except Exception:
txt = None
if isinstance(txt, str):
idx = None
if hasattr(chunk, "index"):
try:
idx = getattr(chunk, "index")
except Exception:
idx = None
try:
idx_int = int(idx) if idx is not None else fallback_index
except Exception:
idx_int = fallback_index
return txt, idx_int
raise ValueError("Unsupported chunk format: cannot extract text/index")
def make_chunk_payloads( def make_chunk_payloads(
note: Any, *args: Any,
chunks, **kwargs: Any,
*,
retriever_weight: Optional[float] = None,
base_payload: Optional[Dict[str, Any]] = None,
vault_root: Optional[str] = None,
**kwargs,
) -> List[Dict[str, Any]]: ) -> List[Dict[str, Any]]:
"""Build Qdrant payloads for chunks from a parsed note and iterable of chunks.
Parameters
----------
note : Any
Parsed note (dict or object with attributes).
chunks : Iterable[Any]
Chunks; supports str, dicts with 'text'/'window'/'body'/'content', objects with same, or (text, idx) tuples.
retriever_weight : Optional[float]
Optional override; if None, value is read from frontmatter.
base_payload : Optional[Dict[str, Any]]
Extra fields to copy onto each chunk.
vault_root : Optional[str]
Optional base path to compute relative 'path' if possible.
**kwargs :
Ignored extra options to remain compatible with callers.
""" """
note_fields = _resolve_note_fields(note, vault_root) Flexible signature for backward/forward compatibility.
fm = note_fields["frontmatter"] Expected positional args:
rw = _resolve_retriever_weight(fm, retriever_weight) args[0] -> note (ParsedNote or Mapping)
args[1] -> chunks (Iterable)
args[2] -> (optional) config/ignored
Recognized kwargs:
- vault_root: base path for relative paths (optional)
- retriever_weight: explicit override (optional)
"""
if not args:
raise TypeError("make_chunk_payloads(note, chunks, *_) requires at least (note, chunks).")
common: Dict[str, Any] = {} note = args[0]
if isinstance(base_payload, dict): chunks = args[1] if len(args) > 1 else kwargs.get("chunks")
common.update({k: v for k, v in base_payload.items() if v is not None}) chunks_list = _coerce_chunks(chunks)
if note_fields.get("note_id") is not None: vault_root = kwargs.get("vault_root")
common["note_id"] = note_fields["note_id"] explicit_weight = kwargs.get("retriever_weight")
if note_fields.get("title") is not None:
common["title"] = note_fields["title"]
if note_fields.get("type") is not None:
common["type"] = note_fields["type"]
if note_fields.get("tags"):
common["tags"] = note_fields["tags"]
if note_fields.get("path") is not None:
common["path"] = note_fields["path"]
if rw is not None:
common["retriever_weight"] = rw
out: List[Dict[str, Any]] = [] fm = _get_frontmatter(note)
for i, ch in enumerate(chunks):
text, idx = _extract_chunk_text_and_index(ch, i)
payload = dict(common)
payload["chunk_index"] = idx
payload["text"] = text
out.append(payload)
return out note_id = _get(note, "note_id") or _get(note, "id") or fm.get("id")
title = _get(note, "title") or fm.get("title")
ntype = _get(note, "type") or fm.get("type")
tags = _get(note, "tags") or fm.get("tags") or []
if not isinstance(tags, list):
tags = list(tags) if tags else []
path_val = _get(note, "path") or _get(note, "abs_path") or fm.get("path")
rweight = _resolve_retriever_weight(explicit_weight, fm)
base = {
"note_id": note_id,
"title": title,
"type": ntype,
"tags": tags,
"path": _to_rel_path(path_val, vault_root),
"retriever_weight": rweight,
}
payloads: List[Dict[str, Any]] = []
for idx, ch in enumerate(chunks_list):
text = _get_chunk_text(ch)
item = dict(base)
item.update(
{
"chunk_index": idx,
"text": text,
}
)
payloads.append(item)
return payloads

View File

@ -1,181 +1,139 @@
# app/core/note_payload.py """
# Version: 1.2.0 (2025-11-08) note_payload.py mindnet core payload builders
# Purpose: Version: 1.3.1 (2025-11-08)
# Build robust Qdrant payloads for NOTE points.
#
# Highlights:
# - Works with both dict-like inputs and ParsedNote-like objects (attribute access).
# - Accepts legacy/extra kwargs (e.g., vault_root) without failing.
# - Copies canonical fields: id/note_id, title, type, tags, path, text (if present).
# - Reliably propagates `retriever_weight` into the payload if set in frontmatter
# (frontmatter.retriever_weight or frontmatter.retriever.weight) or provided explicitly.
#
# Backward compatibility:
# - Signature accepts **kwargs (e.g., vault_root) because some callers pass it.
# - Both 'id' and 'note_id' are written for compatibility with existing queries.
#
# Usage:
# payload = make_note_payload(parsed_note, retriever_weight=None, vault_root="/path/to/vault")
#
# Changelog:
# 1.2.0 (2025-11-08) Accept legacy kwargs, robust getters, propagate retriever_weight.
# 1.1.0 (2025-11-08) Initial robust rewrite with attribute/dict support.
Purpose
-------
Build a robust, forward-compatible note payload for Qdrant upserts.
This module is intentionally defensive:
- Accepts both dict-like "parsed note" objects and dataclass/objects with attributes.
- Tolerates extra kwargs from different callers (e.g., `vault_root`, `prefix`, etc.).
- Ensures `retriever_weight` is resolved and present in the payload if available.
Contract
--------
make_note_payload(note, **kwargs) -> Dict[str, Any]
Expected minimal fields in returned payload:
- note_id (str)
- title (str)
- type (str)
- path (str or None) # relative to vault_root when provided
- tags (List[str])
- retriever_weight (float or None) # if available
"""
from __future__ import annotations from __future__ import annotations
from pathlib import Path from pathlib import Path
from typing import Any, Dict, List, Optional from typing import Any, Dict, Iterable, List, Mapping, Optional, Union
def _get(obj: Any, key: str, default: Any = None) -> Any: def _get(obj: Any, key: str, default: Any = None) -> Any:
"""Robust getter: attribute first, then dict.""" """Try to read `key` from mapping or attribute; else default."""
if obj is None: if obj is None:
return default return default
if hasattr(obj, key): if isinstance(obj, Mapping):
try: return obj.get(key, default)
val = getattr(obj, key) # attribute access
return default if val is None else val return getattr(obj, key, default)
except Exception:
pass
if isinstance(obj, dict):
if key in obj:
val = obj.get(key, default)
return default if val is None else val
return default
def _get_frontmatter(note: Any) -> Dict[str, Any]: def _get_frontmatter(note: Any) -> Mapping[str, Any]:
fm = _get(note, "frontmatter", None) fm = _get(note, "frontmatter", {})
if isinstance(fm, dict): if isinstance(fm, Mapping):
return fm return fm
meta = _get(note, "meta", None) return {} # be safe
if isinstance(meta, dict) and isinstance(meta.get("frontmatter"), dict):
return meta["frontmatter"]
return {}
def _get_from_frontmatter(fm: Dict[str, Any], key: str, default: Any = None) -> Any: def _resolve_retriever_weight(explicit: Any, fm: Mapping[str, Any]) -> Optional[float]:
if not isinstance(fm, dict): """
return default Priority:
if key in fm: 1) explicit kwarg retriever_weight
val = fm.get(key, default) 2) frontmatter['retriever_weight']
return default if val is None else val 3) frontmatter['retriever']['weight']
return default """
def to_float(v: Any) -> Optional[float]:
def _coerce_tags(val: Any) -> List[str]:
if val is None:
return []
if isinstance(val, list):
return [str(x) for x in val]
if isinstance(val, str):
parts = [t.strip() for t in val.split(",")]
return [p for p in parts if p]
return []
def _resolve_retriever_weight(fm: Dict[str, Any], explicit: Optional[float]) -> Optional[float]:
# 1) explicit argument wins
if explicit is not None:
try: try:
return float(explicit) if v is None:
except Exception:
return None return None
# 2) frontmatter.retriever_weight
val = _get_from_frontmatter(fm, "retriever_weight", None)
if isinstance(val, (int, float)):
return float(val)
# 3) frontmatter.retriever.weight
retr = fm.get("retriever")
if isinstance(retr, dict):
v = retr.get("weight")
if isinstance(v, (int, float)):
return float(v) return float(v)
except Exception:
return None
if explicit is not None:
return to_float(explicit)
if "retriever_weight" in fm:
return to_float(fm.get("retriever_weight"))
retr = fm.get("retriever")
if isinstance(retr, Mapping) and "weight" in retr:
return to_float(retr.get("weight"))
return None return None
def _resolve_path(note: Any, fm: Dict[str, Any], vault_root: Optional[str]) -> Optional[str]: def _to_rel_path(abs_path: Optional[Union[str, Path]], vault_root: Optional[Union[str, Path]]) -> Optional[str]:
"""Try to determine a stable relative path for diagnostics/traceability.""" if abs_path is None:
path = _get_from_frontmatter(fm, "path", None)
if path is None:
path = _get(note, "path", None) or _get(note, "source", None) or _get(note, "filepath", None)
if path is None:
return None return None
try: try:
p = Path(abs_path)
if vault_root: if vault_root:
vr = Path(vault_root)
# Avoid Windows drive quirks: use Pure/Path consistently
rel = Path(path)
try: try:
path_rel = str(rel.relative_to(vr)) rp = p.relative_to(Path(vault_root))
return str(rp)
except Exception: except Exception:
# If 'path' is absolute not under vault_root, just return as-is return str(p)
path_rel = str(rel) return str(p)
return path_rel
except Exception: except Exception:
pass return str(abs_path)
return str(path)
def make_note_payload( def make_note_payload(
note: Any, note: Any,
*, *args, # tolerate older/other callers
retriever_weight: Optional[float] = None, **kwargs: Any,
vault_root: Optional[str] = None,
**kwargs,
) -> Dict[str, Any]: ) -> Dict[str, Any]:
""" """
Build a Qdrant payload dict for a NOTE. Build a normalized note payload for Qdrant.
Unknown kwargs are ignored to keep the function forward-compatible.
Parameters Recognized kwargs:
---------- - vault_root: base path to make `path` relative (optional)
note : Any - retriever_weight: explicit override (optional)
Parsed note (dict or object with attributes).
retriever_weight : Optional[float]
Optional override; if None, value is read from frontmatter.
vault_root : Optional[str]
Optional base path to compute relative 'path' if possible.
**kwargs :
Ignored extra options to remain compatible with callers.
Returns
-------
Dict[str, Any]
Payload ready for Qdrant upsert.
""" """
vault_root = kwargs.get("vault_root")
explicit_weight = kwargs.get("retriever_weight")
fm = _get_frontmatter(note) fm = _get_frontmatter(note)
# id / note_id note_id = _get(note, "note_id") or _get(note, "id")
note_id = _get_from_frontmatter(fm, "id", None) or _get(note, "note_id", None) or _get(note, "id", None) if not note_id:
title = _get_from_frontmatter(fm, "title", None) or _get(note, "title", None) # Try from frontmatter
ntype = _get_from_frontmatter(fm, "type", None) or _get(note, "type", None) note_id = fm.get("id")
tags = _coerce_tags(_get_from_frontmatter(fm, "tags", None) or _get(note, "tags", None))
# Optional text for notes collection (only if present; we don't force it) title = _get(note, "title") or fm.get("title")
text = _get(note, "text", None) ntype = _get(note, "type") or fm.get("type")
if text is None and isinstance(note, dict):
text = note.get("body") or note.get("content")
# Path resolution tags = _get(note, "tags") or fm.get("tags") or []
path = _resolve_path(note, fm, vault_root) if not isinstance(tags, list):
tags = list(tags) if tags else []
payload: Dict[str, Any] = {} path_val = _get(note, "path") or _get(note, "abs_path") or fm.get("path")
if note_id is not None:
payload["id"] = note_id # keep for legacy queries
payload["note_id"] = note_id # canonical
if title is not None:
payload["title"] = title
if ntype is not None:
payload["type"] = ntype
if tags:
payload["tags"] = tags
if path is not None:
payload["path"] = path
if text is not None:
payload["text"] = text
rw = _resolve_retriever_weight(fm, retriever_weight) payload: Dict[str, Any] = {
if rw is not None: "note_id": note_id,
payload["retriever_weight"] = rw "title": title,
"type": ntype,
"tags": tags,
"path": _to_rel_path(path_val, vault_root),
"retriever_weight": _resolve_retriever_weight(explicit_weight, fm),
}
# Also surface explicit frontmatter fields (non-conflicting) if present
for k in ("status", "created", "updated"):
v = fm.get(k)
if v is not None and k not in payload:
payload[k] = v
return payload return payload