mindnet/app/core/note_payload.py
Lars 2ddf034983
All checks were successful
Deploy mindnet to llm-node / deploy (push) Successful in 3s
app/core/note_payload.py aktualisiert
2025-11-08 21:21:57 +01:00

161 lines
4.9 KiB
Python

# app/core/note_payload.py
# Line count: 118
from __future__ import annotations
from typing import Any, Dict, List, Optional, Union
# Public API:
# make_note_payload(note, *, retriever_weight: Optional[float] = None) -> Dict[str, Any]
#
# Anforderungen:
# - Akzeptiert sowohl ParsedNote-ähnliche Objekte (Attribute) als auch Dicts.
# - Liest Felder bevorzugt aus Frontmatter:
# id/title/type/tags/path (+ retriever_weight)
# - Fällt robust auf Note-Attribute zurück (note_id, title, type, tags, path).
# - Setzt retriever_weight nur, wenn vorhanden/angegeben (keine Defaults).
# - Gibt eine reine Payload (dict) zurück, die in Qdrant geschrieben werden kann.
def _get(obj: Any, key: str, default: Any = None) -> Any:
"""Robuste Getter-Funktion: erst Attribute, dann Dict-Keys."""
if obj is None:
return default
# Attribute
if hasattr(obj, key):
try:
val = getattr(obj, key)
return val if val is not None else default
except Exception:
pass
# Dict
if isinstance(obj, dict):
if key in obj:
val = obj.get(key, default)
return val if val is not None else default
return default
def _get_frontmatter(note: Any) -> Dict[str, Any]:
fm = _get(note, "frontmatter", None)
if isinstance(fm, dict):
return fm
# Manche Parser legen Meta in "meta" ab
meta = _get(note, "meta", None)
if isinstance(meta, dict) and isinstance(meta.get("frontmatter"), dict):
return meta["frontmatter"]
# Fallback: leeres Dict
return {}
def _get_from_frontmatter(fm: Dict[str, Any], key: str, default: Any = None) -> Any:
if not isinstance(fm, dict):
return default
if key in fm:
val = fm.get(key, default)
return val if val is not None else default
return default
def _coerce_tags(val: Any) -> List[str]:
if val is None:
return []
if isinstance(val, list):
return [str(x) for x in val]
if isinstance(val, str):
# YAML/Frontmatter kann tags als Komma-getrennte Zeichenkette liefern
parts = [t.strip() for t in val.split(",")]
return [p for p in parts if p]
return []
def _resolve_retriever_weight(
fm: Dict[str, Any],
explicit: Optional[float],
) -> Optional[float]:
# 1) explizit über Funktionsargument
if explicit is not None:
return explicit
# 2) im Frontmatter direkt
val = _get_from_frontmatter(fm, "retriever_weight", None)
if isinstance(val, (int, float)):
return float(val)
# 3) verschachtelt: frontmatter.retriever.weight
retr = fm.get("retriever")
if isinstance(retr, dict):
v = retr.get("weight")
if isinstance(v, (int, float)):
return float(v)
return None
def make_note_payload(
note: Any,
*,
retriever_weight: Optional[float] = None,
) -> Dict[str, Any]:
"""
Baut eine Qdrant-Payload für eine Note.
Erwartete Felder (wenn vorhanden): id/note_id, title, type, tags, path, text (optional)
retriever_weight wird gesetzt, wenn vorhanden/angegeben.
"""
fm = _get_frontmatter(note)
# ID priorisieren: frontmatter.id > note.note_id > note.id
note_id = _get_from_frontmatter(fm, "id", None)
if note_id is None:
note_id = _get(note, "note_id", None)
if note_id is None:
note_id = _get(note, "id", None)
title = _get_from_frontmatter(fm, "title", None)
if title is None:
title = _get(note, "title", None)
ntype = _get_from_frontmatter(fm, "type", None)
if ntype is None:
ntype = _get(note, "type", None)
tags = _get_from_frontmatter(fm, "tags", None)
if tags is None:
tags = _get(note, "tags", None)
tags = _coerce_tags(tags)
path = _get_from_frontmatter(fm, "path", None)
if path is None:
path = _get(note, "path", None)
# Einige Parser führen den Pfad als "source" / "filepath"
if path is None:
path = _get(note, "source", None)
if path is None:
path = _get(note, "filepath", None)
# Optionaler Volltext (nicht immer sinnvoll in notes-collection)
text = _get(note, "text", None)
if text is None and isinstance(note, dict):
# Parser variieren; manchmal "body" oder "content"
text = note.get("body") or note.get("content")
payload: Dict[str, Any] = {}
if note_id is not None:
# Für Abwärtskompatibilität beide Felder schreiben
payload["id"] = note_id
payload["note_id"] = note_id
if title is not None:
payload["title"] = title
if ntype is not None:
payload["type"] = ntype
if tags:
payload["tags"] = tags
if path is not None:
payload["path"] = path
if text is not None:
payload["text"] = text
rw = _resolve_retriever_weight(fm, retriever_weight)
if rw is not None:
payload["retriever_weight"] = rw
return payload