From 2ddf0349834ebf7125b857954c7e0bded88a950d Mon Sep 17 00:00:00 2001 From: Lars Date: Sat, 8 Nov 2025 21:21:57 +0100 Subject: [PATCH] app/core/note_payload.py aktualisiert --- app/core/note_payload.py | 204 +++++++++++++++++++++++++++------------ 1 file changed, 143 insertions(+), 61 deletions(-) diff --git a/app/core/note_payload.py b/app/core/note_payload.py index b16fb50..697a10c 100644 --- a/app/core/note_payload.py +++ b/app/core/note_payload.py @@ -1,78 +1,160 @@ +# app/core/note_payload.py +# Line count: 118 + from __future__ import annotations -from typing import Any, Dict, Optional -def _coerce_float(val: Any) -> Optional[float]: +from typing import Any, Dict, List, Optional, Union + +# Public API: +# make_note_payload(note, *, retriever_weight: Optional[float] = None) -> Dict[str, Any] +# +# Anforderungen: +# - Akzeptiert sowohl ParsedNote-ähnliche Objekte (Attribute) als auch Dicts. +# - Liest Felder bevorzugt aus Frontmatter: +# id/title/type/tags/path (+ retriever_weight) +# - Fällt robust auf Note-Attribute zurück (note_id, title, type, tags, path). +# - Setzt retriever_weight nur, wenn vorhanden/angegeben (keine Defaults). +# - Gibt eine reine Payload (dict) zurück, die in Qdrant geschrieben werden kann. + + +def _get(obj: Any, key: str, default: Any = None) -> Any: + """Robuste Getter-Funktion: erst Attribute, dann Dict-Keys.""" + if obj is None: + return default + # Attribute + if hasattr(obj, key): + try: + val = getattr(obj, key) + return val if val is not None else default + except Exception: + pass + # Dict + if isinstance(obj, dict): + if key in obj: + val = obj.get(key, default) + return val if val is not None else default + return default + + +def _get_frontmatter(note: Any) -> Dict[str, Any]: + fm = _get(note, "frontmatter", None) + if isinstance(fm, dict): + return fm + # Manche Parser legen Meta in "meta" ab + meta = _get(note, "meta", None) + if isinstance(meta, dict) and isinstance(meta.get("frontmatter"), dict): + return meta["frontmatter"] + # Fallback: leeres Dict + return {} + + +def _get_from_frontmatter(fm: Dict[str, Any], key: str, default: Any = None) -> Any: + if not isinstance(fm, dict): + return default + if key in fm: + val = fm.get(key, default) + return val if val is not None else default + return default + + +def _coerce_tags(val: Any) -> List[str]: if val is None: - return None - try: - if isinstance(val, (int, float)): - return float(val) - if isinstance(val, str): - v = val.strip() - if not v: - return None - return float(v.replace(",", ".")) - except Exception: - return None - return None + return [] + if isinstance(val, list): + return [str(x) for x in val] + if isinstance(val, str): + # YAML/Frontmatter kann tags als Komma-getrennte Zeichenkette liefern + parts = [t.strip() for t in val.split(",")] + return [p for p in parts if p] + return [] -def _extract_weight(frontmatter: Dict[str, Any], explicit: Optional[float]) -> Optional[float]: + +def _resolve_retriever_weight( + fm: Dict[str, Any], + explicit: Optional[float], +) -> Optional[float]: + # 1) explizit über Funktionsargument if explicit is not None: - return _coerce_float(explicit) - if frontmatter is None: - return None - if "retriever_weight" in frontmatter: - return _coerce_float(frontmatter.get("retriever_weight")) - retriever = frontmatter.get("retriever") - if isinstance(retriever, dict) and "weight" in retriever: - return _coerce_float(retriever.get("weight")) + return explicit + # 2) im Frontmatter direkt + val = _get_from_frontmatter(fm, "retriever_weight", None) + if isinstance(val, (int, float)): + return float(val) + # 3) verschachtelt: frontmatter.retriever.weight + retr = fm.get("retriever") + if isinstance(retr, dict): + v = retr.get("weight") + if isinstance(v, (int, float)): + return float(v) return None -def _resolve_note_id(frontmatter: Dict[str, Any], kw_note_id: Optional[str]) -> Optional[str]: - if kw_note_id: - return kw_note_id - if not isinstance(frontmatter, dict): - return None - return frontmatter.get("id") or frontmatter.get("note_id") def make_note_payload( - frontmatter: Dict[str, Any], - *args, - note_id: Optional[str] = None, - path: str = "", - text: str = "", + note: Any, + *, retriever_weight: Optional[float] = None, - **kwargs, ) -> Dict[str, Any]: """ - Build a note-level payload for Qdrant and inject `retriever_weight` if provided - in frontmatter or as explicit argument. - Extra *args/**kwargs are accepted for backward compatibility. + Baut eine Qdrant-Payload für eine Note. + Erwartete Felder (wenn vorhanden): id/note_id, title, type, tags, path, text (optional) + retriever_weight wird gesetzt, wenn vorhanden/angegeben. """ - nid = _resolve_note_id(frontmatter, note_id) - title = None - typ = None - tags = None - if isinstance(frontmatter, dict): - title = frontmatter.get("title") - typ = frontmatter.get("type") or frontmatter.get("note_type") - tags = frontmatter.get("tags") - if isinstance(tags, str): - tags = [t.strip() for t in tags.split(",") if t.strip()] + fm = _get_frontmatter(note) - payload = { - "id": nid, # keep both 'id' and 'note_id' for downstream compatibility - "note_id": nid, - "title": title, - "type": typ, - "tags": tags, - "path": path or None, - # keep optional raw text for convenience (some tools scroll notes by text) - "text": text or None, - } + # ID priorisieren: frontmatter.id > note.note_id > note.id + note_id = _get_from_frontmatter(fm, "id", None) + if note_id is None: + note_id = _get(note, "note_id", None) + if note_id is None: + note_id = _get(note, "id", None) - weight = _extract_weight(frontmatter, retriever_weight) - if weight is not None: - payload["retriever_weight"] = weight + title = _get_from_frontmatter(fm, "title", None) + if title is None: + title = _get(note, "title", None) + + ntype = _get_from_frontmatter(fm, "type", None) + if ntype is None: + ntype = _get(note, "type", None) + + tags = _get_from_frontmatter(fm, "tags", None) + if tags is None: + tags = _get(note, "tags", None) + tags = _coerce_tags(tags) + + path = _get_from_frontmatter(fm, "path", None) + if path is None: + path = _get(note, "path", None) + # Einige Parser führen den Pfad als "source" / "filepath" + if path is None: + path = _get(note, "source", None) + if path is None: + path = _get(note, "filepath", None) + + # Optionaler Volltext (nicht immer sinnvoll in notes-collection) + text = _get(note, "text", None) + if text is None and isinstance(note, dict): + # Parser variieren; manchmal "body" oder "content" + text = note.get("body") or note.get("content") + + payload: Dict[str, Any] = {} + if note_id is not None: + # Für Abwärtskompatibilität beide Felder schreiben + payload["id"] = note_id + payload["note_id"] = note_id + + if title is not None: + payload["title"] = title + if ntype is not None: + payload["type"] = ntype + if tags: + payload["tags"] = tags + if path is not None: + payload["path"] = path + if text is not None: + payload["text"] = text + + rw = _resolve_retriever_weight(fm, retriever_weight) + if rw is not None: + payload["retriever_weight"] = rw return payload