#!/usr/bin/env python3 from __future__ import annotations import argparse, json, glob, os, sys from app.core.parser import read_markdown, build_note_payload from app.core.validate_note import validate_note_payload from app.core.qdrant import QdrantConfig, get_client, ensure_collections from app.core.qdrant_points import points_for_edges, upsert_batch from app.core.derive_edges import build_note_index, derive_wikilink_edges def iter_notes(vault: str, excludes: list[str]) -> list[tuple[dict, list[dict]]]: files = [p for p in glob.glob(os.path.join(vault, "**/*.md"), recursive=True)] notes = [] for path in files: if any(ex in path for ex in excludes): continue try: fm, body = read_markdown(path) payload = build_note_payload(path, fm, body) validate_note_payload(payload) # wirft Exception wenn invalid # Dummy-Chunks lesen: Falls du schon Chunks erzeugst, nutze die aus dem Importer; # hier nehmen wir einen einfachen 1-Chunk-Fallback (ganzer Body), nur um Links zu finden: chunk = {"chunk_id": f"{payload['note_id']}#1", "note_id": payload["note_id"], "text": body} notes.append((payload, [chunk])) except Exception as e: print(f"skip {path}: {e}") return notes def main(): ap = argparse.ArgumentParser() ap.add_argument("--vault", required=True) ap.add_argument("--exclude", nargs="*", default=["/.obsidian/", "/_backup_frontmatter/"]) args = ap.parse_args() cfg = QdrantConfig.from_env() client = get_client(cfg) ensure_collections(client, cfg.prefix, cfg.dim) # 1) alle Notes parsen note_tuples = iter_notes(args.vault, args.exclude) note_payloads = [n for n, _ in note_tuples] idx = build_note_index(note_payloads) # 2) Edges ableiten all_edges = [] for note_payload, chunks in note_tuples: edges = derive_wikilink_edges(note_payload, chunks, idx) all_edges.extend(edges) # 3) Upsert edges_col, edge_pts = points_for_edges(cfg.prefix, all_edges) upsert_batch(client, edges_col, edge_pts) print(json.dumps({"edges_upserted": len(edge_pts)}, ensure_ascii=False)) if __name__ == "__main__": main()