scripts/import_markdown.py aktualisiert
Some checks failed
Deploy mindnet to llm-node / deploy (push) Failing after 1s
Some checks failed
Deploy mindnet to llm-node / deploy (push) Failing after 1s
This commit is contained in:
parent
364502244a
commit
12dd67fbb5
|
|
@ -2,7 +2,7 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Name: scripts/import_markdown.py
|
||||
Version: v2.2.0 (2025-09-05)
|
||||
Version: v2.2.1 (2025-09-05)
|
||||
Kurzbeschreibung:
|
||||
Importiert Obsidian-Markdown-Notes in Qdrant (Notes/Chunks/Edges).
|
||||
- Leitet Wikilink-Edges (references/backlink/references_at) direkt aus Volltext + echten Chunk-Texten ab.
|
||||
|
|
@ -13,11 +13,11 @@ Aufruf (aus Projekt-Root, im venv):
|
|||
python3 -m scripts.import_markdown --vault ./vault [--apply] [--note-id NOTE_ID] [--embed-note] [--force-replace]
|
||||
|
||||
Parameter:
|
||||
--vault Pfad zum Vault (z. B. ./vault)
|
||||
--apply Führt Upserts in Qdrant aus (ohne Flag = Dry-Run mit JSON-Summaries)
|
||||
--note-id Bearbeite nur eine konkrete Note-ID
|
||||
--embed-note Optional: Note-Vektor (Volltext) zusätzlich einbetten
|
||||
--force-replace Erzwingt Purge & Neuaufbau auch ohne Hash-Änderung (Debug)
|
||||
--vault Pfad zum Vault (z. B. ./vault)
|
||||
--apply Führt Upserts in Qdrant aus (ohne Flag = Dry-Run mit JSON-Summaries)
|
||||
--note-id Bearbeite nur eine konkrete Note-ID
|
||||
--embed-note Optional: Note-Vektor (Volltext) zusätzlich einbetten
|
||||
--force-replace Erzwingt Purge & Neuaufbau auch ohne Hash-Änderung (Debug)
|
||||
|
||||
Umgebungsvariablen (optional):
|
||||
QDRANT_URL, QDRANT_API_KEY, COLLECTION_PREFIX, VECTOR_DIM (Default 384)
|
||||
|
|
@ -32,14 +32,16 @@ Hinweise:
|
|||
* Chunks: payload.note_id == NOTE_ID
|
||||
* Edges : (source_id == NOTE_ID) OR (target_id == NOTE_ID) OR (source_id startswith NOTE_ID + "#")
|
||||
- Notes/Chunks/Edges bleiben 1:1 kompatibel zu Validator & Backfill.
|
||||
|
||||
Changelog:
|
||||
v2.2.1: Bugfix Tippfehler (args.force_replace statt args.force_replaces).
|
||||
v2.2.0: Hash-basierte Replace-on-Change-Logik; Purge pro Note; Skip unverändert.
|
||||
v2.1.1: Sicherstellung references_at durch Übergabe echter Chunk-Texte.
|
||||
v2.1.0: Vorab-Note-Index über Vault; direkte Edge-Ableitung.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
import argparse, os, glob, json, sys, hashlib
|
||||
from typing import List, Dict, Tuple
|
||||
from typing import List, Dict
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from qdrant_client import QdrantClient
|
||||
|
|
@ -159,7 +161,7 @@ def main():
|
|||
index_payloads.append(pl)
|
||||
except Exception:
|
||||
continue
|
||||
note_index = build_note_index(index_payloads) # by_id/by_slug/by_file_slug :contentReference[oaicite:3]{index=3}
|
||||
note_index = build_note_index(index_payloads) # by_id/by_slug/by_file_slug
|
||||
|
||||
notes_col = f"{cfg.prefix}_notes"
|
||||
total_notes = 0
|
||||
|
|
@ -181,7 +183,7 @@ def main():
|
|||
note_pl = make_note_payload(parsed, vault_root=root)
|
||||
validate_note_payload(note_pl)
|
||||
h = compute_hash_fulltext(parsed.body)
|
||||
note_pl["hash_fulltext"] = h # im Schema vorgesehen :contentReference[oaicite:4]{index=4}
|
||||
note_pl["hash_fulltext"] = h
|
||||
|
||||
# Chunks + Payloads
|
||||
chunks = assemble_chunks(fm["id"], parsed.body, fm.get("type", "concept"))
|
||||
|
|
@ -194,7 +196,7 @@ def main():
|
|||
# Optional: Note-Vektor
|
||||
note_vec = embed_one(parsed.body) if args.embed_note else None
|
||||
|
||||
# Edges (aus Volltext + echten Chunk-Texten) :contentReference[oaicite:5]{index=5}
|
||||
# Edges (aus Volltext + echten Chunk-Texten)
|
||||
note_pl_for_edges = {"note_id": fm["id"], "title": fm.get("title"), "path": note_pl["path"], "fulltext": parsed.body}
|
||||
chunks_for_links = []
|
||||
for i, pl in enumerate(chunk_pls):
|
||||
|
|
@ -205,7 +207,7 @@ def main():
|
|||
|
||||
# Bestehende Note laden (für Hash-Vergleich)
|
||||
existing = fetch_existing_note_payload(client, notes_col, fm["id"])
|
||||
changed = args.force_replaces if False else False # placeholder, fixed below
|
||||
changed = False
|
||||
if existing and isinstance(existing, dict):
|
||||
old_h = existing.get("hash_fulltext")
|
||||
changed = (old_h != h)
|
||||
|
|
@ -218,7 +220,7 @@ def main():
|
|||
"title": fm["title"],
|
||||
"chunks": len(chunk_pls),
|
||||
"edges": len(edges),
|
||||
"changed": changed or args.force_replaces,
|
||||
"changed": changed or args.force_replace,
|
||||
"path": note_pl["path"],
|
||||
}, ensure_ascii=False))
|
||||
|
||||
|
|
@ -226,7 +228,7 @@ def main():
|
|||
continue
|
||||
|
||||
# Replace-on-Change: vorherige Artefakte der Note löschen
|
||||
if changed or args.force_replaces:
|
||||
if changed or args.force_replace:
|
||||
purge_note(client, cfg, fm["id"])
|
||||
|
||||
# Upserts
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user