scripts/import_markdown.py aktualisiert
All checks were successful
Deploy mindnet to llm-node / deploy (push) Successful in 3s
All checks were successful
Deploy mindnet to llm-node / deploy (push) Successful in 3s
This commit is contained in:
parent
c3815afcd4
commit
2c81fa0cb5
|
|
@ -243,6 +243,28 @@ def delete_note_everywhere(client, prefix: str, note_id: str) -> None:
|
|||
print(json.dumps({"note_id": note_id, "warn": f"delete in {col} failed: {e}"}))
|
||||
|
||||
|
||||
# --- Neu: Existenz-Checks für Artefakte (fehlertoleranter Rebuild) ---
|
||||
|
||||
def _has_any_point(client, collection: str, note_id: str) -> bool:
|
||||
"""Prüft, ob es mind. einen Punkt mit note_id in der Collection gibt."""
|
||||
filt = rest.Filter(must=[rest.FieldCondition(key="note_id", match=rest.MatchValue(value=note_id))])
|
||||
pts, _ = client.scroll(
|
||||
collection_name=collection,
|
||||
scroll_filter=filt,
|
||||
with_payload=False,
|
||||
with_vectors=False,
|
||||
limit=1,
|
||||
)
|
||||
return bool(pts)
|
||||
|
||||
def artifacts_missing(client, prefix: str, note_id: str) -> Tuple[bool, bool]:
|
||||
"""Gibt (chunks_missing, edges_missing) zurück."""
|
||||
_, chunks_col, edges_col = collections(prefix)
|
||||
chunks_missing = not _has_any_point(client, chunks_col, note_id)
|
||||
edges_missing = not _has_any_point(client, edges_col, note_id)
|
||||
return chunks_missing, edges_missing
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# Main
|
||||
# ---------------------------------------------------------------------
|
||||
|
|
@ -425,7 +447,7 @@ def main() -> None:
|
|||
changed = args.force_replace or (not has_old) or hash_changed or text_changed
|
||||
do_baseline_only = (args.baseline_modes and has_old and needs_baseline and not changed)
|
||||
|
||||
# --- Chunks + Embeddings ---
|
||||
# --- Chunks + Embeddings vorbereiten ---
|
||||
try:
|
||||
body_text = getattr(parsed, "body", "") or ""
|
||||
chunks = assemble_chunks(fm["id"], body_text, fm.get("type", "concept"))
|
||||
|
|
@ -454,10 +476,14 @@ def main() -> None:
|
|||
except Exception as e:
|
||||
print(json.dumps({"path": path, "note_id": note_id, "warn": f"embed_texts failed, using zeros: {e}"}))
|
||||
|
||||
# --- Fehlende Artefakte in Qdrant ermitteln ---
|
||||
chunks_missing, edges_missing = artifacts_missing(client, cfg.prefix, note_id)
|
||||
|
||||
# --- Edges (robust) ---
|
||||
edges: List[Dict[str, Any]] = []
|
||||
edges_failed = False
|
||||
if changed and (not do_baseline_only):
|
||||
should_build_edges = (changed and (not do_baseline_only)) or edges_missing
|
||||
if should_build_edges:
|
||||
try:
|
||||
note_refs = note_pl.get("references") or []
|
||||
edges = build_edges_for_note(
|
||||
|
|
@ -479,10 +505,12 @@ def main() -> None:
|
|||
"edges": len(edges),
|
||||
"edges_failed": edges_failed,
|
||||
"changed": changed,
|
||||
"chunks_missing": chunks_missing,
|
||||
"edges_missing": edges_missing,
|
||||
"needs_baseline_for_mode": needs_baseline,
|
||||
"decision": ("baseline-only" if args.apply and do_baseline_only else
|
||||
"apply" if args.apply and changed else
|
||||
"apply-skip-unchanged" if args.apply and not changed else
|
||||
"apply" if args.apply and (changed or chunks_missing or edges_missing) else
|
||||
"apply-skip-unchanged" if args.apply and not (changed or chunks_missing or edges_missing) else
|
||||
"dry-run"),
|
||||
"path": note_pl["path"],
|
||||
"hash_mode": mode,
|
||||
|
|
@ -508,21 +536,29 @@ def main() -> None:
|
|||
upsert_batch(client, notes_name, note_pts)
|
||||
continue
|
||||
|
||||
if not changed:
|
||||
# Wenn nichts geändert und keine Artefakte fehlen → nichts zu tun
|
||||
if not changed and not (chunks_missing or edges_missing):
|
||||
continue
|
||||
|
||||
if args.purge_before_upsert and has_old:
|
||||
# Purge nur bei echten Änderungen (unverändert + fehlende Artefakte ≠ Purge)
|
||||
if args.purge_before_upsert and has_old and changed:
|
||||
try:
|
||||
purge_note_artifacts(client, cfg.prefix, note_id)
|
||||
except Exception as e:
|
||||
print(json.dumps({"path": path, "note_id": note_id, "warn": f"purge failed: {e}"}))
|
||||
|
||||
notes_name, note_pts = points_for_note(cfg.prefix, note_pl, None, cfg.dim)
|
||||
upsert_batch(client, notes_name, note_pts)
|
||||
if chunk_pls:
|
||||
# Note nur bei Änderungen neu schreiben
|
||||
if changed:
|
||||
notes_name, note_pts = points_for_note(cfg.prefix, note_pl, None, cfg.dim)
|
||||
upsert_batch(client, notes_name, note_pts)
|
||||
|
||||
# Chunks schreiben, wenn geändert ODER vorher fehlend
|
||||
if chunk_pls and (changed or chunks_missing):
|
||||
chunks_name, chunk_pts = points_for_chunks(cfg.prefix, chunk_pls, vecs)
|
||||
upsert_batch(client, chunks_name, chunk_pts)
|
||||
if edges:
|
||||
|
||||
# Edges schreiben, wenn vorhanden und (geändert ODER vorher fehlend)
|
||||
if edges and (changed or edges_missing):
|
||||
edges_name, edge_pts = points_for_edges(cfg.prefix, edges)
|
||||
upsert_batch(client, edges_name, edge_pts)
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user