scripts/import_markdown.py aktualisiert
Some checks failed
Deploy mindnet to llm-node / deploy (push) Failing after 2s
Some checks failed
Deploy mindnet to llm-node / deploy (push) Failing after 2s
This commit is contained in:
parent
3e888cc669
commit
58bce0ea19
|
|
@ -2,7 +2,7 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Script: scripts/import_markdown.py — Markdown → Qdrant (Notes, Chunks, Edges)
|
||||
Version: 3.6.1
|
||||
Version: 3.6.2
|
||||
Datum: 2025-09-09
|
||||
|
||||
Kurzbeschreibung
|
||||
|
|
@ -11,6 +11,8 @@ Kurzbeschreibung
|
|||
- Robuste Änderungserkennung: Mehrfach-Hashes werden parallel in der Note gespeichert
|
||||
(Option C). Vergleich erfolgt **modusgenau** anhand von `hashes[<mode>:<source>:<normalize>]`.
|
||||
Ein Wechsel des Vergleichsmodus führt so **nicht** zu Massenänderungen.
|
||||
- **Fix (v3.6.2):** Bei **erstem Import** (kein Alt-Payload) wird die Note als **geändert**
|
||||
behandelt → Create (Notes/Chunks/Edges) findet zuverlässig statt.
|
||||
- Baseline-Modus: Mit `--baseline-modes` werden **fehlende** Hash-Varianten
|
||||
im Feld `hashes` „still“ nachgetragen (Upsert NUR Notes; Legacy-Hashfelder bleiben unangetastet).
|
||||
|
||||
|
|
@ -257,6 +259,8 @@ def main() -> None:
|
|||
|
||||
# -------- Fetch old payload --------
|
||||
old_payload = None if args.force_replace else fetch_existing_note_payload(client, cfg.prefix, note_id)
|
||||
has_old = old_payload is not None
|
||||
|
||||
old_hashes = (old_payload or {}).get("hashes") or {}
|
||||
old_hash_exact = old_hashes.get(key_current)
|
||||
|
||||
|
|
@ -265,8 +269,10 @@ def main() -> None:
|
|||
|
||||
needs_baseline = (old_hash_exact is None)
|
||||
|
||||
# Change-Detection: nur wenn Baseline existiert und Hash differiert,
|
||||
# oder wenn force/text_changed explizit Änderungen anzeigt.
|
||||
# Change-Detection:
|
||||
# - CREATE: wenn es KEIN Alt-Payload gibt -> changed=True
|
||||
# - UPDATE: baseline existiert UND Hash differiert
|
||||
# - force/text_changed wie gehabt
|
||||
hash_changed = (old_hash_exact is not None and new_hash_exact is not None and old_hash_exact != new_hash_exact)
|
||||
|
||||
text_changed = False
|
||||
|
|
@ -275,18 +281,18 @@ def main() -> None:
|
|||
new_text = note_pl.get("fulltext") or ""
|
||||
text_changed = (old_text != new_text)
|
||||
|
||||
changed = args.force_replace or hash_changed or text_changed
|
||||
changed = args.force_replace or (not has_old) or hash_changed or text_changed
|
||||
|
||||
# Soll Baseline „still“ ergänzt werden?
|
||||
do_baseline_only = (args.baseline_modes and needs_baseline and not changed)
|
||||
# Baseline-only nur, wenn Alt-Payload existiert UND Key fehlt UND keine sonstige Änderung
|
||||
do_baseline_only = (args.baseline_modes and has_old and needs_baseline and not changed)
|
||||
|
||||
# -------- Optional: Chunks / Embeddings / Edges vorbereiten --------
|
||||
# Nur notwendig, wenn wir tatsächlich Änderungen schreiben (nicht für baseline-only).
|
||||
# CREATE/UPDATE: wir brauchen Chunks/Edges; Baseline-only: nein
|
||||
chunks = []
|
||||
chunk_pls = []
|
||||
edges = []
|
||||
vecs = []
|
||||
if (changed and args.apply):
|
||||
if (args.apply and (changed and (not do_baseline_only))):
|
||||
try:
|
||||
chunks = assemble_chunks(fm["id"], getattr(parsed, "body", "") or "", fm.get("type", "concept"))
|
||||
chunk_pls = make_chunk_payloads(fm, note_pl["path"], chunks)
|
||||
|
|
@ -352,11 +358,11 @@ def main() -> None:
|
|||
upsert_batch(client, notes_name, note_pts)
|
||||
continue
|
||||
|
||||
# Normale Änderungen schreiben (Notes + Chunks + Edges)
|
||||
# Normale CREATE/UPDATE
|
||||
if not changed:
|
||||
continue
|
||||
|
||||
if args.purge_before_upsert:
|
||||
if args.purge_before_upsert and has_old:
|
||||
try:
|
||||
purge_note_artifacts(client, cfg.prefix, note_id)
|
||||
except Exception as e:
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user