#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Modul: scripts/export_markdown.py Version: 1.6.1 Datum: 2025-11-07 Zweck ----- Exportiert Notes aus Qdrant zurück in Markdown-Dateien (verlustarm): • Pfade relativieren, Backslashes → Slashes • Body aus 'fulltext' (falls vorhanden) oder Rekonstruktion via Chunks (seq/chunk_index) • Optional: vorhandene Edges pro Note mit exportieren (--include-edges yaml|footer) CLI --- export COLLECTION_PREFIX="mindnet" python3 -m scripts.export_markdown --out ./_exportVault python3 -m scripts.export_markdown --out ./_exportVault --note-id python3 -m scripts.export_markdown --out ./_exportVault --overwrite python3 -m scripts.export_markdown --out ./_exportVault --include-edges yaml python3 -m scripts.export_markdown --out ./_exportVault --include-edges footer Parameter --------- --out Zielwurzel (Ordner wird angelegt) --prefix überschreibt ENV COLLECTION_PREFIX (Default: mindnet) --note-id nur eine bestimmte Note exportieren --overwrite vorhandene Dateien überschreiben --include-edges none|yaml|footer (Default: none) """ from __future__ import annotations import argparse import os import json from pathlib import Path from typing import Dict, List, Tuple, Optional from app.core.qdrant import ( QdrantConfig, get_client, fetch_all_notes, fetch_chunks_for_note, fetch_edges_for_note, # <— jetzt angebunden ensure_collections, ) def _normalize_rel_path(p: str) -> str: p = (p or "").replace("\\", "/") while p.startswith("/"): p = p[1:] return p def _ensure_parent(p: Path): p.parent.mkdir(parents=True, exist_ok=True) def _yaml_frontmatter(d: Dict) -> str: import io def _ser(obj): if isinstance(obj, str): if any(ch in obj for ch in [":", "-", "{", "}", "[", "]", ",", "#", "&", "*", "!", "|", ">", "'", "\"", "%", "@", "`"]): return '"' + obj.replace('"', '\\"') + '"' return obj if isinstance(obj, bool): return "true" if obj else "false" if obj is None: return "null" if isinstance(obj, (int, float)): return str(obj) if isinstance(obj, list): return "[" + ", ".join(_ser(x) for x in obj) + "]" if isinstance(obj, dict): inner = [] for k in sorted(obj.keys()): inner.append(f"{k}: {_ser(obj[k])}") return "{ " + ", ".join(inner) + " }" return '"' + str(obj).replace('"', '\\"') + '"' buf = io.StringIO() buf.write("---\n") for k in sorted(d.keys()): buf.write(f"{k}: {_ser(d[k])}\n") buf.write("---\n") return buf.getvalue() def _reconstruct_body_from_chunks(chunks: List[Dict]) -> str: if not chunks: return "" def _num_from_chunk_id(cid: str) -> int: try: if "#" in cid: return int(cid.split("#", 1)[1]) return 0 except Exception: return 0 chunks_sorted = sorted( chunks, key=lambda c: ( int(c.get("seq", c.get("chunk_index", 0))), int(c.get("chunk_index", 0)), _num_from_chunk_id(str(c.get("chunk_id", ""))), ) ) body = "".join(c.get("text") or "" for c in chunks_sorted) return body def parse_args() -> argparse.Namespace: p = argparse.ArgumentParser(prog="export_markdown.py", description="Exportiert Notes aus Qdrant in Markdown.") p.add_argument("--out", required=True, help="Zielordner") p.add_argument("--prefix", default="", help="Collections-Prefix; überschreibt ENV COLLECTION_PREFIX") p.add_argument("--note-id", default="", help="nur eine Note exportieren") p.add_argument("--overwrite", action="store_true", help="vorhandene Dateien überschreiben") p.add_argument("--include-edges", default="none", choices=["none", "yaml", "footer"], help="Edges im Export anzeigen") return p.parse_args() def main(): args = parse_args() out_root = Path(args.out).resolve() out_root.mkdir(parents=True, exist_ok=True) prefix = args.prefix.strip() or os.environ.get("COLLECTION_PREFIX", "").strip() or "mindnet" cfg = QdrantConfig.from_env(prefix=prefix) client = get_client(cfg) ensure_collections(client, cfg) if args.note_id: notes = fetch_all_notes(client, cfg, only_note_id=args.note_id) else: notes = fetch_all_notes(client, cfg) exported = 0 for n in notes: note_id = n.get("note_id") or n.get("id") if not note_id: continue rel = _normalize_rel_path(str(n.get("path") or f"{note_id}.md")) dst = out_root.joinpath(rel) body = str(n.get("fulltext") or "") if not body: chunks = fetch_chunks_for_note(client, cfg, note_id) body = _reconstruct_body_from_chunks(chunks) fm = {} for k in ("id", "title", "type", "status", "created", "tags", "priority", "due", "effort_min", "values", "goals", "embedding_exclude"): if k in n: fm[k] = n[k] for k in ("hash_signature", "hash_fulltext", "hash_body", "hash_frontmatter"): if k in n: fm[k] = n[k] edges_block = "" if args.include_edges in ("yaml", "footer"): try: edges = fetch_edges_for_note(client, cfg, note_id) or [] if args.include_edges == "yaml": fm["_edges"] = edges else: edges_block = "\n\n---\n_edges_:\n" + json.dumps(edges, ensure_ascii=False, indent=2) + "\n" except Exception: pass if dst.exists() and not args.overwrite: decision = "skip" else: _ensure_parent(dst) content = _yaml_frontmatter(fm) + (body or "") + edges_block dst.write_text(content, encoding="utf-8") decision = "write" print(json.dumps({"note_id": note_id, "path": str(dst), "decision": decision}, ensure_ascii=False)) if decision == "write": exported += 1 print(f"Done. Exported notes: {exported}") if __name__ == "__main__": main()