scripts/export_markdown.py aktualisiert
Some checks failed
Deploy mindnet to llm-node / deploy (push) Failing after 2s

This commit is contained in:
Lars 2025-09-24 12:13:04 +02:00
parent 58bce0ea19
commit 5c937fbffe

View File

@ -1,249 +1,457 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
""" """
Script: export_markdown.py Qdrant Markdown (Obsidian-kompatibel) Script: scripts/import_markdown.py Markdown Qdrant (Notes, Chunks, Edges)
Version: 1.4.0 Version: 3.7.0
Datum: 2025-09-09 Datum: 2025-09-10
Kurzbeschreibung Kurzbeschreibung
---------------- ----------------
Exportiert Markdown-Notizen aus Qdrant in einen Zielordner (Vault). - Liest Markdown-Dateien ein und erzeugt Notes/Chunks/Edges **idempotent**.
Rekonstruiert YAML-Frontmatter und Body. - Robuste Änderungserkennung: Mehrfach-Hashes werden parallel in der Note gespeichert
(Option C). Vergleich erfolgt **modusgenau** anhand von `hashes[<mode>:<source>:<normalize>]`.
Ein Wechsel des Vergleichsmodus führt so **nicht** zu Massenänderungen.
- **Erstimport-Fix:** Bei leerem Qdrant gilt Create-Fall automatisch als geändert.
- **--baseline-modes:** fehlende Hash-Varianten still nachtragen (nur Notes upserten).
- **--sync-deletes:** Punkte in Qdrant, die im Vault fehlen, sicher löschen (Dry-Run + Apply).
- **--prefix**: CLI-Override für COLLECTION_PREFIX.
Body-Rekonstruktions-Priorität (abwärtskompatibel): Hash/Compare Konfiguration
1) notes.payload.fulltext (verlustfrei, wenn beim Import gespeichert) --------------------------
2) ansonsten aus allen zugehörigen Chunks: payload.text payload.content payload.raw - Vergleichsmodus: `--hash-mode body|frontmatter|full` oder ENV `MINDNET_HASH_MODE|MINDNET_HASH_COMPARE`
in stabiler, sequentieller Reihenfolge (seq/chunk_index/ID-Nummer) - Quelle: `--hash-source parsed|raw` (ENV: MINDNET_HASH_SOURCE, Default parsed)
- Normalisierung: `--hash-normalize canonical|none` (ENV: MINDNET_HASH_NORMALIZE, Default canonical)
- Optional: `--compare-text` (oder `MINDNET_COMPARE_TEXT=true`) vergleicht zusätzlich den parsed Body-Text direkt.
Wichtige Fixes Qdrant / ENV
--------------
- **Pfad-Normalisierung**: erzwingt relative Pfade (führe führende '/' ab, backslashes slashes),
damit ``--out`` nicht ignoriert wird.
- **`--prefix` (optional)**: Überschreibt COLLECTION_PREFIX; ENV bleibt Default (rückwärtskompatibel).
ENV / Qdrant
------------ ------------
- QDRANT_URL (oder QDRANT_HOST/QDRANT_PORT) - QDRANT_URL | QDRANT_HOST/QDRANT_PORT | QDRANT_API_KEY
- QDRANT_API_KEY (optional) - COLLECTION_PREFIX (Default: mindnet), via `--prefix` überschreibbar
- COLLECTION_PREFIX (Default: mindnet) - VECTOR_DIM (Default: 384)
- MINDNET_NOTE_SCOPE_REFS: true|false (Default: false)
Aufruf
------
python3 -m scripts.export_markdown --out ./_exportVault
python3 -m scripts.export_markdown --out ./_exportVault --note-id 20250821-foo
python3 -m scripts.export_markdown --out ./_exportVault --overwrite
python3 -m scripts.export_markdown --out ./_exportVault --prefix mindnet_dev # optional
Beispiele Beispiele
--------- ---------
COLLECTION_PREFIX=mindnet QDRANT_URL=http://127.0.0.1:6333 \\ # Standard (Body, parsed, canonical)
python3 -m scripts.export_markdown --out ./_exportVault --overwrite python3 -m scripts.import_markdown --vault ./vault
"""
# Erstimport nach truncate (Create-Fall)
python3 -m scripts.import_markdown --vault ./vault --apply --purge-before-upsert
# Baseline für parsed:canonical „still“ auffüllen
MINDNET_HASH_SOURCE=parsed MINDNET_HASH_NORMALIZE=canonical \
python3 -m scripts.import_markdown --vault ./vault --apply --baseline-modes
# Frontmatter-Vergleich ohne Massenänderungen (nach Baseline)
MINDNET_HASH_COMPARE=Frontmatter \
python3 -m scripts.import_markdown --vault ./vault
# Sync-Deletes (Dry-Run → Apply)
python3 -m scripts.import_markdown --vault ./vault --sync-deletes
python3 -m scripts.import_markdown --vault ./vault --sync-deletes --apply
# Prefix explizit setzen
python3 -m scripts.import_markdown --vault ./vault --prefix mindnet
"""
from __future__ import annotations from __future__ import annotations
import argparse import argparse
import json import json
import os import os
import re import sys
from typing import Dict, Iterable, List, Optional, Tuple from typing import Dict, List, Optional, Tuple, Any, Set
import yaml from dotenv import load_dotenv
from qdrant_client.http import models as rest from qdrant_client.http import models as rest
from qdrant_client import QdrantClient
from app.core.qdrant import QdrantConfig, get_client, ensure_collections from app.core.parser import (
read_markdown,
normalize_frontmatter,
validate_required_frontmatter,
)
from app.core.note_payload import make_note_payload
from app.core.chunker import assemble_chunks
from app.core.chunk_payload import make_chunk_payloads
# Kompatibel zu beiden Modulnamen:
try:
from app.core.derive_edges import build_edges_for_note
except Exception: # pragma: no cover
from app.core.edges import build_edges_for_note # type: ignore
from app.core.qdrant import (
QdrantConfig,
get_client,
ensure_collections,
ensure_payload_indexes,
)
from app.core.qdrant_points import (
points_for_chunks,
points_for_note,
points_for_edges,
upsert_batch,
)
try:
from app.core.embed import embed_texts # optional
except Exception:
embed_texts = None
# ----------------------------------------------------------------------------- # ---------------------------------------------------------------------
# Utilities # Helper
# ----------------------------------------------------------------------------- # ---------------------------------------------------------------------
def _names(prefix: str) -> Tuple[str, str, str]: def iter_md(root: str) -> List[str]:
out: List[str] = []
for dirpath, _, filenames in os.walk(root):
for fn in filenames:
if not fn.lower().endswith(".md"):
continue
p = os.path.join(dirpath, fn)
pn = p.replace("\\", "/")
if any(ex in pn for ex in ["/.obsidian/", "/_backup_frontmatter/", "/_imported/"]):
continue
out.append(p)
return sorted(out)
def collections(prefix: str) -> Tuple[str, str, str]:
return f"{prefix}_notes", f"{prefix}_chunks", f"{prefix}_edges" return f"{prefix}_notes", f"{prefix}_chunks", f"{prefix}_edges"
def fetch_existing_note_payload(client, prefix: str, note_id: str) -> Optional[Dict]:
notes_col, _, _ = collections(prefix)
f = rest.Filter(must=[rest.FieldCondition(key="note_id", match=rest.MatchValue(value=note_id))])
points, _ = client.scroll(
collection_name=notes_col,
scroll_filter=f,
with_payload=True,
with_vectors=False,
limit=1,
)
if not points:
return None
return points[0].payload or {}
def _ensure_dir(path: str) -> None: def list_qdrant_note_ids(client, prefix: str) -> Set[str]:
d = os.path.dirname(path) """Liest alle note_ids aus der Notes-Collection (per Scroll)."""
if d and not os.path.isdir(d): notes_col, _, _ = collections(prefix)
os.makedirs(d, exist_ok=True) out: Set[str] = set()
def _normalize_rel_path(p: str) -> str:
"""Pfad relativ halten & normalisieren (slashes, führende / entfernen)."""
p = (p or "").replace("\\", "/")
return p.lstrip("/")
def _to_md(frontmatter: dict, body: str) -> str:
fm = yaml.safe_dump(frontmatter, sort_keys=False, allow_unicode=True).strip()
return f"---\n{fm}\n---\n{(body or '').rstrip()}\n"
def _scroll_all(
client: QdrantClient,
col: str,
flt: Optional[rest.Filter] = None,
with_payload: bool = True,
with_vectors: bool = False,
limit: int = 256,
):
"""Scrollt durch alle Punkte einer Collection und liefert eine Liste mit Points."""
out = []
next_page = None next_page = None
while True: while True:
pts, next_page = client.scroll( pts, next_page = client.scroll(
collection_name=col, collection_name=notes_col,
scroll_filter=flt, with_payload=True,
with_payload=with_payload, with_vectors=False,
with_vectors=with_vectors, limit=256,
limit=limit,
offset=next_page, offset=next_page,
) )
if not pts: if not pts:
break break
out.extend(pts) for p in pts:
if not next_page: pl = p.payload or {}
nid = pl.get("note_id")
if isinstance(nid, str):
out.add(nid)
if next_page is None:
break break
return out return out
def purge_note_artifacts(client, prefix: str, note_id: str) -> None:
"""
Löscht alle Chunks & Edges zu einer Note mittels Filter-Selector (kompatibel mit aktuellen Qdrant-Clients).
"""
_, chunks_col, edges_col = collections(prefix)
filt = rest.Filter(must=[rest.FieldCondition(key="note_id", match=rest.MatchValue(value=note_id))])
def _load_chunks_for_note(client: QdrantClient, chunks_col: str, note_id: str) -> List[dict]: try:
flt = rest.Filter(must=[rest.FieldCondition( client.delete(
key="note_id", collection_name=chunks_col,
match=rest.MatchValue(value=note_id), points_selector=rest.FilterSelector(filter=filt),
)]) wait=True
pts = _scroll_all(client, chunks_col, flt, with_payload=True, with_vectors=False) )
# Sortierung: bevorzugt seq → chunk_index → Nummer in id except Exception as e:
def _seq(pl: dict) -> Tuple[int, int, int]: print(json.dumps({"note_id": note_id, "warn": f"delete chunks via filter failed: {e}"}))
s1 = pl.get("seq", pl.get("chunk_index", -1))
s2 = pl.get("chunk_index", -1) try:
# Nummer-Anteil aus "noteid#<n>" client.delete(
s3 = 0 collection_name=edges_col,
points_selector=rest.FilterSelector(filter=filt),
wait=True
)
except Exception as e:
print(json.dumps({"note_id": note_id, "warn": f"delete edges via filter failed: {e}"}))
def delete_note_everywhere(client, prefix: str, note_id: str) -> None:
"""Löscht Note + zugehörige Chunks + Edges per Filter."""
notes_col, chunks_col, edges_col = collections(prefix)
filt = rest.Filter(must=[rest.FieldCondition(key="note_id", match=rest.MatchValue(value=note_id))])
# Reihenfolge: erst edges/chunks, dann note
for col in (edges_col, chunks_col, notes_col):
try: try:
m = re.search(r"#(\\d+)$", pl.get("id") or "") client.delete(
if m: collection_name=col,
s3 = int(m.group(1)) points_selector=rest.FilterSelector(filter=filt),
except Exception: wait=True
pass )
return (int(s1) if isinstance(s1, int) else -1, int(s2) if isinstance(s2, int) else -1, s3) except Exception as e:
print(json.dumps({"note_id": note_id, "warn": f"delete in {col} failed: {e}"}))
pts_sorted = sorted(pts, key=lambda p: _seq(p.payload or {})) def _resolve_mode(val: Optional[str]) -> str:
return [p.payload or {} for p in pts_sorted] v = (val or os.environ.get("MINDNET_HASH_MODE") or os.environ.get("MINDNET_HASH_COMPARE") or "body").strip().lower()
if v in ("full", "fulltext", "body+frontmatter", "bodyplusfrontmatter"):
return "full"
if v in ("frontmatter", "fm"):
return "frontmatter"
return "body"
def _env(key: str, default: str) -> str:
return (os.environ.get(key) or default).strip().lower()
def _reconstruct_body(note_pl: dict, chunk_payloads: List[dict]) -> str: # ---------------------------------------------------------------------
# 1) Volltext vorhanden?
fulltext = note_pl.get("fulltext")
if isinstance(fulltext, str) and fulltext.strip():
return fulltext
# 2) Aus Chunks zusammensetzen: text → content → raw
parts: List[str] = []
for ch in chunk_payloads:
text = ch.get("text") or ch.get("content") or ch.get("raw")
if isinstance(text, str) and text.strip():
parts.append(text.rstrip())
return ("\n\n".join(parts)).rstrip() + ("\n" if parts else "")
def _export_one_note(
client: QdrantClient,
prefix: str,
note_pl: dict,
out_root: str,
overwrite: bool,
) -> dict:
notes_col, chunks_col, _ = _names(prefix)
note_id = note_pl.get("note_id") or note_pl.get("id")
# Pfad robust bestimmen und relativ halten
path = note_pl.get("path") or f"{note_id}.md"
path = _normalize_rel_path(path)
out_path = os.path.join(out_root, path).replace("\\", "/")
# Frontmatter aus Payload zurückführen (nur bekannte Felder)
fm: Dict[str, object] = {}
for k in [
"title", "id", "type", "status", "created", "updated", "tags",
"priority", "effort_min", "due", "people", "aliases",
"depends_on", "assigned_to", "lang",
]:
v = note_pl.get(k) if k in note_pl else note_pl.get(f"note_{k}")
if v not in (None, [], ""):
fm[k] = v
# Mindestfelder
if "id" not in fm and note_id:
fm["id"] = note_id
if "title" not in fm and note_pl.get("title"):
fm["title"] = note_pl["title"]
# Body beschaffen
chunks = _load_chunks_for_note(client, chunks_col, note_id)
body = _reconstruct_body(note_pl, chunks)
# Schreiben?
if os.path.exists(out_path) and not overwrite:
return {"note_id": note_id, "path": path, "status": "skip_exists"}
_ensure_dir(out_path)
with open(out_path, "w", encoding="utf-8") as f:
f.write(_to_md(fm, body))
return {"note_id": note_id, "path": path, "status": "written"}
# -----------------------------------------------------------------------------
# Main # Main
# ----------------------------------------------------------------------------- # ---------------------------------------------------------------------
def main() -> None: def main() -> None:
load_dotenv()
ap = argparse.ArgumentParser() ap = argparse.ArgumentParser()
ap.add_argument("--out", required=True, help="Zielordner für den Export-Vault") ap.add_argument("--vault", required=True, help="Pfad zum Obsidian-Vault (Root-Ordner)")
ap.add_argument("--note-id", help="Nur eine Note exportieren (Note-ID)") ap.add_argument("--apply", action="store_true", help="Schreibt in Qdrant; ohne Flag nur Dry-Run")
ap.add_argument("--overwrite", action="store_true", help="Bestehende Dateien überschreiben") ap.add_argument("--purge-before-upsert", action="store_true",
ap.add_argument("--prefix", help="(Optional) überschreibt COLLECTION_PREFIX aus ENV") help="Vor Upsert Chunks & Edges der GEÄNDERTEN Note löschen")
ap.add_argument("--note-id", help="Nur eine bestimmte Note-ID verarbeiten")
ap.add_argument("--embed-note", action="store_true", help="Optional: Note-Volltext einbetten")
ap.add_argument("--force-replace", action="store_true",
help="Änderungserkennung ignorieren und immer upserten (+ optional Purge)")
ap.add_argument("--hash-mode", choices=["body", "frontmatter", "full"], default=None,
help="Vergleichsmodus (Body | Frontmatter | Full)")
ap.add_argument("--hash-normalize", choices=["canonical", "none"], default=None)
ap.add_argument("--hash-source", choices=["parsed", "raw"], default=None,
help="Quelle für die Hash-Berechnung (Default: parsed)")
ap.add_argument("--note-scope-refs", action="store_true",
help="(Optional) erzeugt zusätzlich references:note (Default: aus)")
ap.add_argument("--debug-hash-diff", action="store_true",
help="(reserviert) optionaler Body-Diff (nicht nötig bei Option C)")
ap.add_argument("--compare-text", action="store_true",
help="Parsed fulltext zusätzlich direkt vergleichen (über Hash hinaus)")
ap.add_argument("--baseline-modes", action="store_true",
help="Fehlende Hash-Varianten im Feld 'hashes' still nachtragen (Upsert NUR Notes)")
ap.add_argument("--sync-deletes", action="store_true",
help="Notes/Chunks/Edges löschen, die in Qdrant existieren aber im Vault fehlen (Dry-Run; mit --apply ausführen)")
ap.add_argument("--prefix", help="Collection-Prefix (überschreibt ENV COLLECTION_PREFIX)")
args = ap.parse_args() args = ap.parse_args()
# Qdrant-Konfiguration mode = _resolve_mode(args.hash_mode) # body|frontmatter|full
src = _env("MINDNET_HASH_SOURCE", args.hash_source or "parsed") # parsed|raw
norm = _env("MINDNET_HASH_NORMALIZE", args.hash_normalize or "canonical") # canonical|none
note_scope_refs_env = (_env("MINDNET_NOTE_SCOPE_REFS", "false") == "true")
note_scope_refs = args.note_scope_refs or note_scope_refs_env
compare_text = args.compare_text or (_env("MINDNET_COMPARE_TEXT", "false") == "true")
cfg = QdrantConfig.from_env() cfg = QdrantConfig.from_env()
if args.prefix: if args.prefix:
cfg.prefix = args.prefix # abwärtskompatibel: ENV bleibt Default cfg.prefix = args.prefix.strip()
client = get_client(cfg) client = get_client(cfg)
ensure_collections(client, cfg.prefix, cfg.dim) ensure_collections(client, cfg.prefix, cfg.dim)
ensure_payload_indexes(client, cfg.prefix)
notes_col, _, _ = _names(cfg.prefix) root = os.path.abspath(args.vault)
files = iter_md(root)
if not files:
print("Keine Markdown-Dateien gefunden.", file=sys.stderr)
sys.exit(2)
# Notes holen (optional gefiltert) # Optional: Sync-Deletes vorab
flt = None if args.sync_deletes:
if args.note_id: # Vault-Note-IDs sammeln
flt = rest.Filter(must=[rest.FieldCondition( vault_note_ids: Set[str] = set()
key="note_id", for path in files:
match=rest.MatchValue(value=args.note_id), try:
)]) parsed = read_markdown(path)
if not parsed:
continue
fm = normalize_frontmatter(parsed.frontmatter)
nid = fm.get("id")
if isinstance(nid, str):
vault_note_ids.add(nid)
except Exception:
continue
# Qdrant-Note-IDs sammeln
qdrant_note_ids = list_qdrant_note_ids(client, cfg.prefix)
to_delete = sorted(qdrant_note_ids - vault_note_ids)
print(json.dumps({
"action": "sync-deletes",
"prefix": cfg.prefix,
"qdrant_total": len(qdrant_note_ids),
"vault_total": len(vault_note_ids),
"to_delete_count": len(to_delete),
"to_delete": to_delete[:50] + ([""] if len(to_delete) > 50 else [])
}, ensure_ascii=False))
if args.apply and to_delete:
for nid in to_delete:
print(json.dumps({"action": "delete", "note_id": nid, "decision": "apply"}))
delete_note_everywhere(client, cfg.prefix, nid)
# Danach normal mit Import fortfahren (z. B. neue Vault-Notes anlegen)
note_pts = _scroll_all(client, notes_col, flt, with_payload=True, with_vectors=False) key_current = f"{mode}:{src}:{norm}"
if not note_pts:
print(json.dumps({"exported": 0, "out": args.out, "message": "Keine Notes gefunden."}, ensure_ascii=False))
return
results = [] processed = 0
for p in note_pts: for path in files:
pl = p.payload or {} # -------- Parse & Validate --------
try: try:
res = _export_one_note(client, cfg.prefix, pl, args.out, args.overwrite) parsed = read_markdown(path)
except Exception as e: except Exception as e:
res = {"note_id": pl.get("note_id") or pl.get("id"), "error": str(e)} print(json.dumps({"path": path, "error": f"read_markdown failed: {e}"}))
results.append(res) continue
if parsed is None:
print(json.dumps({"path": path, "error": "read_markdown returned None"}))
continue
print(json.dumps({ try:
"exported": len([r for r in results if r.get("status") == "written"]), fm = normalize_frontmatter(parsed.frontmatter)
"skipped": len([r for r in results if r.get("status") == "skip_exists"]), validate_required_frontmatter(fm)
"out": args.out, except Exception as e:
"details": results, print(json.dumps({"path": path, "error": f"Frontmatter invalid: {e}"}))
}, ensure_ascii=False)) continue
if args.note_id and fm.get("id") != args.note_id:
continue
processed += 1
# -------- Build new payload (includes 'hashes') --------
note_pl = make_note_payload(
parsed,
vault_root=root,
hash_mode=mode,
hash_normalize=norm,
hash_source=src,
file_path=path,
)
if not note_pl.get("fulltext"):
note_pl["fulltext"] = getattr(parsed, "body", "") or ""
note_id = note_pl.get("note_id") or fm.get("id")
if not note_id:
print(json.dumps({"path": path, "error": "Missing note_id after payload build"}))
continue
# -------- Fetch old payload --------
old_payload = None if args.force_replace else fetch_existing_note_payload(client, cfg.prefix, note_id)
has_old = old_payload is not None
old_hashes = (old_payload or {}).get("hashes") or {}
old_hash_exact = old_hashes.get(key_current)
# Neu-Hash (aktueller Modus) aus neuem Payload
new_hash_exact = (note_pl.get("hashes") or {}).get(key_current)
needs_baseline = (old_hash_exact is None)
# Change-Detection:
# - CREATE: wenn es KEIN Alt-Payload gibt -> changed=True
# - UPDATE: baseline existiert UND Hash differiert
# - force/text_changed wie gehabt
hash_changed = (old_hash_exact is not None and new_hash_exact is not None and old_hash_exact != new_hash_exact)
text_changed = False
if compare_text:
old_text = (old_payload or {}).get("fulltext") or ""
new_text = note_pl.get("fulltext") or ""
text_changed = (old_text != new_text)
changed = args.force_replace or (not has_old) or hash_changed or text_changed
# Baseline-only nur, wenn Alt-Payload existiert UND Key fehlt UND keine sonstige Änderung
do_baseline_only = (args.baseline_modes and has_old and needs_baseline and not changed)
# -------- Optional: Chunks / Embeddings / Edges vorbereiten --------
# CREATE/UPDATE: wir brauchen Chunks/Edges; Baseline-only: nein
chunks = []
chunk_pls = []
edges = []
vecs = []
if (args.apply and (changed and (not do_baseline_only))):
try:
chunks = assemble_chunks(fm["id"], getattr(parsed, "body", "") or "", fm.get("type", "concept"))
chunk_pls = make_chunk_payloads(fm, note_pl["path"], chunks)
except Exception as e:
print(json.dumps({"path": path, "note_id": note_id, "error": f"chunk build failed: {e}"}))
continue
if embed_texts:
try:
vecs = embed_texts([getattr(c, "text", "") for c in chunks]) # type: ignore[attr-defined]
except Exception as e:
print(json.dumps({"path": path, "note_id": note_id, "warn": f"embed_texts failed, using zeros: {e}"}))
vecs = [[0.0] * cfg.dim for _ in chunks]
else:
vecs = [[0.0] * cfg.dim for _ in chunks]
try:
note_refs = note_pl.get("references") or []
edges = build_edges_for_note(
note_id,
chunk_pls,
note_refs,
include_note_scope_refs=note_scope_refs,
)
except Exception as e:
print(json.dumps({"path": path, "note_id": note_id, "error": f"build_edges_for_note failed: {e}"}))
continue
# -------- Summary --------
summary = {
"note_id": note_id,
"title": fm.get("title"),
"chunks": len(chunk_pls),
"edges": len(edges),
"changed": changed,
"needs_baseline_for_mode": needs_baseline,
"decision": ("baseline-only" if args.apply and do_baseline_only else
"apply" if args.apply and changed else
"apply-skip-unchanged" if args.apply and not changed else
"dry-run"),
"path": note_pl["path"],
"hash_mode": mode,
"hash_normalize": norm,
"hash_source": src,
"prefix": cfg.prefix,
}
print(json.dumps(summary, ensure_ascii=False))
# -------- Writes --------
if not args.apply:
continue
# BASELINE-ONLY: fehlenden Key nachtragen, ohne legacy Felder anzutasten
if do_baseline_only:
merged_hashes = {}
merged_hashes.update(old_hashes)
merged_hashes.update(note_pl.get("hashes") or {})
# Legacy-Hashfelder unverändert lassen, falls vorhanden
if old_payload:
note_pl["hash_fulltext"] = old_payload.get("hash_fulltext", note_pl.get("hash_fulltext"))
note_pl["hash_signature"] = old_payload.get("hash_signature", note_pl.get("hash_signature"))
note_pl["hashes"] = merged_hashes
notes_name, note_pts = points_for_note(cfg.prefix, note_pl, None, cfg.dim)
upsert_batch(client, notes_name, note_pts)
continue
# Normale CREATE/UPDATE
if not changed:
continue
if args.purge_before_upsert and has_old:
try:
purge_note_artifacts(client, cfg.prefix, note_id)
except Exception as e:
print(json.dumps({"path": path, "note_id": note_id, "warn": f"purge failed: {e}"}))
notes_name, note_pts = points_for_note(cfg.prefix, note_pl, None, cfg.dim)
upsert_batch(client, notes_name, note_pts)
chunks_name, chunk_pts = points_for_chunks(cfg.prefix, chunk_pls, vecs)
upsert_batch(client, chunks_name, chunk_pts)
edges_name, edge_pts = points_for_edges(cfg.prefix, edges)
upsert_batch(client, edges_name, edge_pts)
print(f"Done. Processed notes: {processed}")
if __name__ == "__main__": if __name__ == "__main__":