Dateien nach "app/core" hochladen
All checks were successful
Deploy mindnet to llm-node / deploy (push) Successful in 3s
All checks were successful
Deploy mindnet to llm-node / deploy (push) Successful in 3s
This commit is contained in:
parent
2de786fc64
commit
2b84c62875
|
|
@ -1,9 +1,27 @@
|
|||
# app/core/chunk_payload.py
|
||||
# Line count: 214
|
||||
# Version: 1.2.0 (2025-11-08)
|
||||
# Purpose:
|
||||
# Build robust Qdrant payloads for CHUNK points.
|
||||
#
|
||||
# Highlights:
|
||||
# - Works with dict-like chunks and simple objects; supports (text, idx) tuples.
|
||||
# - Accepts legacy/extra kwargs (e.g., vault_root) without failing.
|
||||
# - Copies canonical note fields onto each chunk (note_id/title/type/tags/path).
|
||||
# - Sets 'text' and 'chunk_index' per chunk.
|
||||
# - Reliably propagates `retriever_weight` onto every chunk if provided in
|
||||
# frontmatter or explicitly.
|
||||
#
|
||||
# Usage:
|
||||
# payloads = make_chunk_payloads(note, chunks, retriever_weight=None, base_payload=None, vault_root="/path/to/vault")
|
||||
#
|
||||
# Changelog:
|
||||
# 1.2.0 (2025-11-08) Accept legacy kwargs, robust getters, propagate retriever_weight.
|
||||
# 1.1.0 (2025-11-08) Initial robust rewrite with attribute/dict support.
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Iterable, List, Optional, Tuple
|
||||
|
||||
|
||||
def _get(obj: Any, key: str, default: Any = None) -> Any:
|
||||
|
|
@ -12,13 +30,13 @@ def _get(obj: Any, key: str, default: Any = None) -> Any:
|
|||
if hasattr(obj, key):
|
||||
try:
|
||||
val = getattr(obj, key)
|
||||
return val if val is not None else default
|
||||
return default if val is None else val
|
||||
except Exception:
|
||||
pass
|
||||
if isinstance(obj, dict):
|
||||
if key in obj:
|
||||
val = obj.get(key, default)
|
||||
return val if val is not None else default
|
||||
return default if val is None else val
|
||||
return default
|
||||
|
||||
|
||||
|
|
@ -37,7 +55,7 @@ def _get_from_frontmatter(fm: Dict[str, Any], key: str, default: Any = None) ->
|
|||
return default
|
||||
if key in fm:
|
||||
val = fm.get(key, default)
|
||||
return val if val is not None else default
|
||||
return default if val is None else val
|
||||
return default
|
||||
|
||||
|
||||
|
|
@ -52,12 +70,12 @@ def _coerce_tags(val: Any) -> List[str]:
|
|||
return []
|
||||
|
||||
|
||||
def _resolve_retriever_weight(
|
||||
fm: Dict[str, Any],
|
||||
explicit: Optional[float],
|
||||
) -> Optional[float]:
|
||||
def _resolve_retriever_weight(fm: Dict[str, Any], explicit: Optional[float]) -> Optional[float]:
|
||||
if explicit is not None:
|
||||
return explicit
|
||||
try:
|
||||
return float(explicit)
|
||||
except Exception:
|
||||
return None
|
||||
val = _get_from_frontmatter(fm, "retriever_weight", None)
|
||||
if isinstance(val, (int, float)):
|
||||
return float(val)
|
||||
|
|
@ -69,58 +87,37 @@ def _resolve_retriever_weight(
|
|||
return None
|
||||
|
||||
|
||||
def _resolve_note_fields(note: Any) -> Dict[str, Any]:
|
||||
fm = _get_frontmatter(note)
|
||||
|
||||
note_id = _get_from_frontmatter(fm, "id", None)
|
||||
if note_id is None:
|
||||
note_id = _get(note, "note_id", None)
|
||||
if note_id is None:
|
||||
note_id = _get(note, "id", None)
|
||||
|
||||
title = _get_from_frontmatter(fm, "title", None)
|
||||
if title is None:
|
||||
title = _get(note, "title", None)
|
||||
|
||||
ntype = _get_from_frontmatter(fm, "type", None)
|
||||
if ntype is None:
|
||||
ntype = _get(note, "type", None)
|
||||
|
||||
tags = _get_from_frontmatter(fm, "tags", None)
|
||||
if tags is None:
|
||||
tags = _get(note, "tags", None)
|
||||
tags = _coerce_tags(tags)
|
||||
|
||||
def _resolve_path(note: Any, fm: Dict[str, Any], vault_root: Optional[str]) -> Optional[str]:
|
||||
path = _get_from_frontmatter(fm, "path", None)
|
||||
if path is None:
|
||||
path = _get(note, "path", None)
|
||||
path = _get(note, "path", None) or _get(note, "source", None) or _get(note, "filepath", None)
|
||||
if path is None:
|
||||
path = _get(note, "source", None)
|
||||
if path is None:
|
||||
path = _get(note, "filepath", None)
|
||||
|
||||
return {
|
||||
"note_id": note_id,
|
||||
"title": title,
|
||||
"type": ntype,
|
||||
"tags": tags,
|
||||
"path": path,
|
||||
"frontmatter": fm,
|
||||
}
|
||||
return None
|
||||
try:
|
||||
if vault_root:
|
||||
vr = Path(vault_root)
|
||||
rel = Path(path)
|
||||
try:
|
||||
return str(rel.relative_to(vr))
|
||||
except Exception:
|
||||
return str(rel)
|
||||
except Exception:
|
||||
pass
|
||||
return str(path)
|
||||
|
||||
|
||||
def _extract_chunk_text_and_index(
|
||||
chunk: Any,
|
||||
fallback_index: int,
|
||||
) -> Tuple[str, int]:
|
||||
"""
|
||||
Akzeptiert verschiedene Chunk-Formate:
|
||||
- str (reiner Text)
|
||||
- dict mit keys: text | window | body | content
|
||||
- Objekt mit Attributen: text | window | body | content
|
||||
- (text, idx) Tuple
|
||||
"""
|
||||
# Tuple (text, idx)
|
||||
def _resolve_note_fields(note: Any, vault_root: Optional[str]) -> Dict[str, Any]:
|
||||
fm = _get_frontmatter(note)
|
||||
note_id = _get_from_frontmatter(fm, "id", None) or _get(note, "note_id", None) or _get(note, "id", None)
|
||||
title = _get_from_frontmatter(fm, "title", None) or _get(note, "title", None)
|
||||
ntype = _get_from_frontmatter(fm, "type", None) or _get(note, "type", None)
|
||||
tags = _coerce_tags(_get_from_frontmatter(fm, "tags", None) or _get(note, "tags", None))
|
||||
path = _resolve_path(note, fm, vault_root)
|
||||
return {"note_id": note_id, "title": title, "type": ntype, "tags": tags, "path": path, "frontmatter": fm}
|
||||
|
||||
|
||||
def _extract_chunk_text_and_index(chunk: Any, fallback_index: int) -> Tuple[str, int]:
|
||||
# (text, idx) tuple
|
||||
if isinstance(chunk, tuple) and len(chunk) == 2 and isinstance(chunk[0], str):
|
||||
txt, idx = chunk
|
||||
try:
|
||||
|
|
@ -128,19 +125,12 @@ def _extract_chunk_text_and_index(
|
|||
except Exception:
|
||||
idx_int = fallback_index
|
||||
return txt, idx_int
|
||||
|
||||
# String
|
||||
# string
|
||||
if isinstance(chunk, str):
|
||||
return chunk, fallback_index
|
||||
|
||||
# Dict
|
||||
# dict
|
||||
if isinstance(chunk, dict):
|
||||
txt = (
|
||||
chunk.get("text")
|
||||
or chunk.get("window")
|
||||
or chunk.get("body")
|
||||
or chunk.get("content")
|
||||
)
|
||||
txt = chunk.get("text") or chunk.get("window") or chunk.get("body") or chunk.get("content")
|
||||
if isinstance(txt, str):
|
||||
idx = chunk.get("index")
|
||||
try:
|
||||
|
|
@ -148,8 +138,7 @@ def _extract_chunk_text_and_index(
|
|||
except Exception:
|
||||
idx_int = fallback_index
|
||||
return txt, idx_int
|
||||
|
||||
# Objekt mit Attributen
|
||||
# object with attributes
|
||||
for attr in ("text", "window", "body", "content"):
|
||||
if hasattr(chunk, attr):
|
||||
try:
|
||||
|
|
@ -157,7 +146,6 @@ def _extract_chunk_text_and_index(
|
|||
except Exception:
|
||||
txt = None
|
||||
if isinstance(txt, str):
|
||||
# Optionale "index"-Quelle
|
||||
idx = None
|
||||
if hasattr(chunk, "index"):
|
||||
try:
|
||||
|
|
@ -169,32 +157,41 @@ def _extract_chunk_text_and_index(
|
|||
except Exception:
|
||||
idx_int = fallback_index
|
||||
return txt, idx_int
|
||||
|
||||
# Wenn nichts passt -> klarer Fehler
|
||||
raise ValueError("Unsupported chunk format: cannot extract text/index")
|
||||
|
||||
|
||||
def make_chunk_payloads(
|
||||
note: Any,
|
||||
chunks: Iterable[Any],
|
||||
chunks,
|
||||
*,
|
||||
retriever_weight: Optional[float] = None,
|
||||
base_payload: Optional[Dict[str, Any]] = None,
|
||||
vault_root: Optional[str] = None,
|
||||
**kwargs,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Build Qdrant payloads for chunks from a parsed note and iterable of chunks.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
note : Any
|
||||
Parsed note (dict or object with attributes).
|
||||
chunks : Iterable[Any]
|
||||
Chunks; supports str, dicts with 'text'/'window'/'body'/'content', objects with same, or (text, idx) tuples.
|
||||
retriever_weight : Optional[float]
|
||||
Optional override; if None, value is read from frontmatter.
|
||||
base_payload : Optional[Dict[str, Any]]
|
||||
Extra fields to copy onto each chunk.
|
||||
vault_root : Optional[str]
|
||||
Optional base path to compute relative 'path' if possible.
|
||||
**kwargs :
|
||||
Ignored extra options to remain compatible with callers.
|
||||
"""
|
||||
Erzeugt Qdrant-Payloads für Chunk-Punkte.
|
||||
- Kopiert Note-Metadaten (note_id/title/type/tags/path)
|
||||
- Schreibt text + chunk_index je Chunk
|
||||
- Setzt retriever_weight, wenn vorhanden/angegeben
|
||||
"""
|
||||
out: List[Dict[str, Any]] = []
|
||||
note_fields = _resolve_note_fields(note)
|
||||
note_fields = _resolve_note_fields(note, vault_root)
|
||||
fm = note_fields["frontmatter"]
|
||||
rw = _resolve_retriever_weight(fm, retriever_weight)
|
||||
|
||||
# Basisfelder, die jeder Chunk tragen soll
|
||||
common: Dict[str, Any] = {}
|
||||
if base_payload:
|
||||
if isinstance(base_payload, dict):
|
||||
common.update({k: v for k, v in base_payload.items() if v is not None})
|
||||
|
||||
if note_fields.get("note_id") is not None:
|
||||
|
|
@ -210,9 +207,10 @@ def make_chunk_payloads(
|
|||
if rw is not None:
|
||||
common["retriever_weight"] = rw
|
||||
|
||||
out: List[Dict[str, Any]] = []
|
||||
for i, ch in enumerate(chunks):
|
||||
text, idx = _extract_chunk_text_and_index(ch, i)
|
||||
payload = dict(common) # copy
|
||||
payload = dict(common)
|
||||
payload["chunk_index"] = idx
|
||||
payload["text"] = text
|
||||
out.append(payload)
|
||||
|
|
|
|||
|
|
@ -1,38 +1,46 @@
|
|||
# app/core/note_payload.py
|
||||
# Line count: 118
|
||||
# Version: 1.2.0 (2025-11-08)
|
||||
# Purpose:
|
||||
# Build robust Qdrant payloads for NOTE points.
|
||||
#
|
||||
# Highlights:
|
||||
# - Works with both dict-like inputs and ParsedNote-like objects (attribute access).
|
||||
# - Accepts legacy/extra kwargs (e.g., vault_root) without failing.
|
||||
# - Copies canonical fields: id/note_id, title, type, tags, path, text (if present).
|
||||
# - Reliably propagates `retriever_weight` into the payload if set in frontmatter
|
||||
# (frontmatter.retriever_weight or frontmatter.retriever.weight) or provided explicitly.
|
||||
#
|
||||
# Backward compatibility:
|
||||
# - Signature accepts **kwargs (e.g., vault_root) because some callers pass it.
|
||||
# - Both 'id' and 'note_id' are written for compatibility with existing queries.
|
||||
#
|
||||
# Usage:
|
||||
# payload = make_note_payload(parsed_note, retriever_weight=None, vault_root="/path/to/vault")
|
||||
#
|
||||
# Changelog:
|
||||
# 1.2.0 (2025-11-08) Accept legacy kwargs, robust getters, propagate retriever_weight.
|
||||
# 1.1.0 (2025-11-08) Initial robust rewrite with attribute/dict support.
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, List, Optional, Union
|
||||
|
||||
# Public API:
|
||||
# make_note_payload(note, *, retriever_weight: Optional[float] = None) -> Dict[str, Any]
|
||||
#
|
||||
# Anforderungen:
|
||||
# - Akzeptiert sowohl ParsedNote-ähnliche Objekte (Attribute) als auch Dicts.
|
||||
# - Liest Felder bevorzugt aus Frontmatter:
|
||||
# id/title/type/tags/path (+ retriever_weight)
|
||||
# - Fällt robust auf Note-Attribute zurück (note_id, title, type, tags, path).
|
||||
# - Setzt retriever_weight nur, wenn vorhanden/angegeben (keine Defaults).
|
||||
# - Gibt eine reine Payload (dict) zurück, die in Qdrant geschrieben werden kann.
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
|
||||
def _get(obj: Any, key: str, default: Any = None) -> Any:
|
||||
"""Robuste Getter-Funktion: erst Attribute, dann Dict-Keys."""
|
||||
"""Robust getter: attribute first, then dict."""
|
||||
if obj is None:
|
||||
return default
|
||||
# Attribute
|
||||
if hasattr(obj, key):
|
||||
try:
|
||||
val = getattr(obj, key)
|
||||
return val if val is not None else default
|
||||
return default if val is None else val
|
||||
except Exception:
|
||||
pass
|
||||
# Dict
|
||||
if isinstance(obj, dict):
|
||||
if key in obj:
|
||||
val = obj.get(key, default)
|
||||
return val if val is not None else default
|
||||
return default if val is None else val
|
||||
return default
|
||||
|
||||
|
||||
|
|
@ -40,11 +48,9 @@ def _get_frontmatter(note: Any) -> Dict[str, Any]:
|
|||
fm = _get(note, "frontmatter", None)
|
||||
if isinstance(fm, dict):
|
||||
return fm
|
||||
# Manche Parser legen Meta in "meta" ab
|
||||
meta = _get(note, "meta", None)
|
||||
if isinstance(meta, dict) and isinstance(meta.get("frontmatter"), dict):
|
||||
return meta["frontmatter"]
|
||||
# Fallback: leeres Dict
|
||||
return {}
|
||||
|
||||
|
||||
|
|
@ -53,7 +59,7 @@ def _get_from_frontmatter(fm: Dict[str, Any], key: str, default: Any = None) ->
|
|||
return default
|
||||
if key in fm:
|
||||
val = fm.get(key, default)
|
||||
return val if val is not None else default
|
||||
return default if val is None else val
|
||||
return default
|
||||
|
||||
|
||||
|
|
@ -63,24 +69,23 @@ def _coerce_tags(val: Any) -> List[str]:
|
|||
if isinstance(val, list):
|
||||
return [str(x) for x in val]
|
||||
if isinstance(val, str):
|
||||
# YAML/Frontmatter kann tags als Komma-getrennte Zeichenkette liefern
|
||||
parts = [t.strip() for t in val.split(",")]
|
||||
return [p for p in parts if p]
|
||||
return []
|
||||
|
||||
|
||||
def _resolve_retriever_weight(
|
||||
fm: Dict[str, Any],
|
||||
explicit: Optional[float],
|
||||
) -> Optional[float]:
|
||||
# 1) explizit über Funktionsargument
|
||||
def _resolve_retriever_weight(fm: Dict[str, Any], explicit: Optional[float]) -> Optional[float]:
|
||||
# 1) explicit argument wins
|
||||
if explicit is not None:
|
||||
return explicit
|
||||
# 2) im Frontmatter direkt
|
||||
try:
|
||||
return float(explicit)
|
||||
except Exception:
|
||||
return None
|
||||
# 2) frontmatter.retriever_weight
|
||||
val = _get_from_frontmatter(fm, "retriever_weight", None)
|
||||
if isinstance(val, (int, float)):
|
||||
return float(val)
|
||||
# 3) verschachtelt: frontmatter.retriever.weight
|
||||
# 3) frontmatter.retriever.weight
|
||||
retr = fm.get("retriever")
|
||||
if isinstance(retr, dict):
|
||||
v = retr.get("weight")
|
||||
|
|
@ -89,59 +94,75 @@ def _resolve_retriever_weight(
|
|||
return None
|
||||
|
||||
|
||||
def _resolve_path(note: Any, fm: Dict[str, Any], vault_root: Optional[str]) -> Optional[str]:
|
||||
"""Try to determine a stable relative path for diagnostics/traceability."""
|
||||
path = _get_from_frontmatter(fm, "path", None)
|
||||
if path is None:
|
||||
path = _get(note, "path", None) or _get(note, "source", None) or _get(note, "filepath", None)
|
||||
if path is None:
|
||||
return None
|
||||
try:
|
||||
if vault_root:
|
||||
vr = Path(vault_root)
|
||||
# Avoid Windows drive quirks: use Pure/Path consistently
|
||||
rel = Path(path)
|
||||
try:
|
||||
path_rel = str(rel.relative_to(vr))
|
||||
except Exception:
|
||||
# If 'path' is absolute not under vault_root, just return as-is
|
||||
path_rel = str(rel)
|
||||
return path_rel
|
||||
except Exception:
|
||||
pass
|
||||
return str(path)
|
||||
|
||||
|
||||
def make_note_payload(
|
||||
note: Any,
|
||||
*,
|
||||
retriever_weight: Optional[float] = None,
|
||||
vault_root: Optional[str] = None,
|
||||
**kwargs,
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Baut eine Qdrant-Payload für eine Note.
|
||||
Erwartete Felder (wenn vorhanden): id/note_id, title, type, tags, path, text (optional)
|
||||
retriever_weight wird gesetzt, wenn vorhanden/angegeben.
|
||||
Build a Qdrant payload dict for a NOTE.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
note : Any
|
||||
Parsed note (dict or object with attributes).
|
||||
retriever_weight : Optional[float]
|
||||
Optional override; if None, value is read from frontmatter.
|
||||
vault_root : Optional[str]
|
||||
Optional base path to compute relative 'path' if possible.
|
||||
**kwargs :
|
||||
Ignored extra options to remain compatible with callers.
|
||||
|
||||
Returns
|
||||
-------
|
||||
Dict[str, Any]
|
||||
Payload ready for Qdrant upsert.
|
||||
"""
|
||||
fm = _get_frontmatter(note)
|
||||
|
||||
# ID priorisieren: frontmatter.id > note.note_id > note.id
|
||||
note_id = _get_from_frontmatter(fm, "id", None)
|
||||
if note_id is None:
|
||||
note_id = _get(note, "note_id", None)
|
||||
if note_id is None:
|
||||
note_id = _get(note, "id", None)
|
||||
# id / note_id
|
||||
note_id = _get_from_frontmatter(fm, "id", None) or _get(note, "note_id", None) or _get(note, "id", None)
|
||||
title = _get_from_frontmatter(fm, "title", None) or _get(note, "title", None)
|
||||
ntype = _get_from_frontmatter(fm, "type", None) or _get(note, "type", None)
|
||||
tags = _coerce_tags(_get_from_frontmatter(fm, "tags", None) or _get(note, "tags", None))
|
||||
|
||||
title = _get_from_frontmatter(fm, "title", None)
|
||||
if title is None:
|
||||
title = _get(note, "title", None)
|
||||
|
||||
ntype = _get_from_frontmatter(fm, "type", None)
|
||||
if ntype is None:
|
||||
ntype = _get(note, "type", None)
|
||||
|
||||
tags = _get_from_frontmatter(fm, "tags", None)
|
||||
if tags is None:
|
||||
tags = _get(note, "tags", None)
|
||||
tags = _coerce_tags(tags)
|
||||
|
||||
path = _get_from_frontmatter(fm, "path", None)
|
||||
if path is None:
|
||||
path = _get(note, "path", None)
|
||||
# Einige Parser führen den Pfad als "source" / "filepath"
|
||||
if path is None:
|
||||
path = _get(note, "source", None)
|
||||
if path is None:
|
||||
path = _get(note, "filepath", None)
|
||||
|
||||
# Optionaler Volltext (nicht immer sinnvoll in notes-collection)
|
||||
# Optional text for notes collection (only if present; we don't force it)
|
||||
text = _get(note, "text", None)
|
||||
if text is None and isinstance(note, dict):
|
||||
# Parser variieren; manchmal "body" oder "content"
|
||||
text = note.get("body") or note.get("content")
|
||||
|
||||
# Path resolution
|
||||
path = _resolve_path(note, fm, vault_root)
|
||||
|
||||
payload: Dict[str, Any] = {}
|
||||
if note_id is not None:
|
||||
# Für Abwärtskompatibilität beide Felder schreiben
|
||||
payload["id"] = note_id
|
||||
payload["note_id"] = note_id
|
||||
|
||||
payload["id"] = note_id # keep for legacy queries
|
||||
payload["note_id"] = note_id # canonical
|
||||
if title is not None:
|
||||
payload["title"] = title
|
||||
if ntype is not None:
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user