scripts/import_markdown.py aktualisiert
All checks were successful
Deploy mindnet to llm-node / deploy (push) Successful in 3s

This commit is contained in:
Lars 2025-11-09 10:42:55 +01:00
parent c3815afcd4
commit 2c81fa0cb5

View File

@ -243,6 +243,28 @@ def delete_note_everywhere(client, prefix: str, note_id: str) -> None:
print(json.dumps({"note_id": note_id, "warn": f"delete in {col} failed: {e}"}))
# --- Neu: Existenz-Checks für Artefakte (fehlertoleranter Rebuild) ---
def _has_any_point(client, collection: str, note_id: str) -> bool:
"""Prüft, ob es mind. einen Punkt mit note_id in der Collection gibt."""
filt = rest.Filter(must=[rest.FieldCondition(key="note_id", match=rest.MatchValue(value=note_id))])
pts, _ = client.scroll(
collection_name=collection,
scroll_filter=filt,
with_payload=False,
with_vectors=False,
limit=1,
)
return bool(pts)
def artifacts_missing(client, prefix: str, note_id: str) -> Tuple[bool, bool]:
"""Gibt (chunks_missing, edges_missing) zurück."""
_, chunks_col, edges_col = collections(prefix)
chunks_missing = not _has_any_point(client, chunks_col, note_id)
edges_missing = not _has_any_point(client, edges_col, note_id)
return chunks_missing, edges_missing
# ---------------------------------------------------------------------
# Main
# ---------------------------------------------------------------------
@ -425,7 +447,7 @@ def main() -> None:
changed = args.force_replace or (not has_old) or hash_changed or text_changed
do_baseline_only = (args.baseline_modes and has_old and needs_baseline and not changed)
# --- Chunks + Embeddings ---
# --- Chunks + Embeddings vorbereiten ---
try:
body_text = getattr(parsed, "body", "") or ""
chunks = assemble_chunks(fm["id"], body_text, fm.get("type", "concept"))
@ -454,10 +476,14 @@ def main() -> None:
except Exception as e:
print(json.dumps({"path": path, "note_id": note_id, "warn": f"embed_texts failed, using zeros: {e}"}))
# --- Fehlende Artefakte in Qdrant ermitteln ---
chunks_missing, edges_missing = artifacts_missing(client, cfg.prefix, note_id)
# --- Edges (robust) ---
edges: List[Dict[str, Any]] = []
edges_failed = False
if changed and (not do_baseline_only):
should_build_edges = (changed and (not do_baseline_only)) or edges_missing
if should_build_edges:
try:
note_refs = note_pl.get("references") or []
edges = build_edges_for_note(
@ -479,10 +505,12 @@ def main() -> None:
"edges": len(edges),
"edges_failed": edges_failed,
"changed": changed,
"chunks_missing": chunks_missing,
"edges_missing": edges_missing,
"needs_baseline_for_mode": needs_baseline,
"decision": ("baseline-only" if args.apply and do_baseline_only else
"apply" if args.apply and changed else
"apply-skip-unchanged" if args.apply and not changed else
"apply" if args.apply and (changed or chunks_missing or edges_missing) else
"apply-skip-unchanged" if args.apply and not (changed or chunks_missing or edges_missing) else
"dry-run"),
"path": note_pl["path"],
"hash_mode": mode,
@ -508,21 +536,29 @@ def main() -> None:
upsert_batch(client, notes_name, note_pts)
continue
if not changed:
# Wenn nichts geändert und keine Artefakte fehlen → nichts zu tun
if not changed and not (chunks_missing or edges_missing):
continue
if args.purge_before_upsert and has_old:
# Purge nur bei echten Änderungen (unverändert + fehlende Artefakte ≠ Purge)
if args.purge_before_upsert and has_old and changed:
try:
purge_note_artifacts(client, cfg.prefix, note_id)
except Exception as e:
print(json.dumps({"path": path, "note_id": note_id, "warn": f"purge failed: {e}"}))
notes_name, note_pts = points_for_note(cfg.prefix, note_pl, None, cfg.dim)
upsert_batch(client, notes_name, note_pts)
if chunk_pls:
# Note nur bei Änderungen neu schreiben
if changed:
notes_name, note_pts = points_for_note(cfg.prefix, note_pl, None, cfg.dim)
upsert_batch(client, notes_name, note_pts)
# Chunks schreiben, wenn geändert ODER vorher fehlend
if chunk_pls and (changed or chunks_missing):
chunks_name, chunk_pts = points_for_chunks(cfg.prefix, chunk_pls, vecs)
upsert_batch(client, chunks_name, chunk_pts)
if edges:
# Edges schreiben, wenn vorhanden und (geändert ODER vorher fehlend)
if edges and (changed or edges_missing):
edges_name, edge_pts = points_for_edges(cfg.prefix, edges)
upsert_batch(client, edges_name, edge_pts)