mindnet/scripts/export_markdown.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Modul: scripts/export_markdown.py
Version: 1.6.1
Datum: 2025-11-07

Zweck
-----
Exportiert Notes aus Qdrant zurück in Markdown-Dateien (verlustarm):
  • Pfade relativieren, Backslashes → Slashes
  • Body aus 'fulltext' (falls vorhanden) oder Rekonstruktion via Chunks (seq/chunk_index)
  • Optional: vorhandene Edges pro Note mit exportieren (--include-edges yaml|footer)

CLI
---
    export COLLECTION_PREFIX="mindnet"
    python3 -m scripts.export_markdown --out ./_exportVault
    python3 -m scripts.export_markdown --out ./_exportVault --note-id <ID>
    python3 -m scripts.export_markdown --out ./_exportVault --overwrite
    python3 -m scripts.export_markdown --out ./_exportVault --include-edges yaml
    python3 -m scripts.export_markdown --out ./_exportVault --include-edges footer

Parameter
---------
--out            Zielwurzel (Ordner wird angelegt)
--prefix         überschreibt ENV COLLECTION_PREFIX (Default: mindnet)
--note-id        nur eine bestimmte Note exportieren
--overwrite      vorhandene Dateien überschreiben
--include-edges  none|yaml|footer  (Default: none)
"""
from __future__ import annotations

import argparse
import os
import json
from pathlib import Path
from typing import Dict, List, Tuple, Optional

from app.core.qdrant import (
    QdrantConfig,
    get_client,
    fetch_all_notes,
    fetch_chunks_for_note,
    fetch_edges_for_note,   # <— jetzt angebunden
    ensure_collections,
)

def _normalize_rel_path(p: str) -> str:
    p = (p or "").replace("\\", "/")
    while p.startswith("/"):
        p = p[1:]
    return p

def _ensure_parent(p: Path):
    p.parent.mkdir(parents=True, exist_ok=True)

def _yaml_frontmatter(d: Dict) -> str:
    import io
    def _ser(obj):
        if isinstance(obj, str):
            if any(ch in obj for ch in [":", "-", "{", "}", "[", "]", ",", "#", "&", "*", "!", "|", ">", "'", "\"", "%", "@", "`"]):
                return '"' + obj.replace('"', '\\"') + '"'
            return obj
        if isinstance(obj, bool):
            return "true" if obj else "false"
        if obj is None:
            return "null"
        if isinstance(obj, (int, float)):
            return str(obj)
        if isinstance(obj, list):
            return "[" + ", ".join(_ser(x) for x in obj) + "]"
        if isinstance(obj, dict):
            inner = []
            for k in sorted(obj.keys()):
                inner.append(f"{k}: {_ser(obj[k])}")
            return "{ " + ", ".join(inner) + " }"
        return '"' + str(obj).replace('"', '\\"') + '"'

    buf = io.StringIO()
    buf.write("---\n")
    for k in sorted(d.keys()):
        buf.write(f"{k}: {_ser(d[k])}\n")
    buf.write("---\n")
    return buf.getvalue()

def _reconstruct_body_from_chunks(chunks: List[Dict]) -> str:
    if not chunks:
        return ""
    def _num_from_chunk_id(cid: str) -> int:
        try:
            if "#" in cid:
                return int(cid.split("#", 1)[1])
            return 0
        except Exception:
            return 0
    chunks_sorted = sorted(
        chunks,
        key=lambda c: (
            int(c.get("seq", c.get("chunk_index", 0))),
            int(c.get("chunk_index", 0)),
            _num_from_chunk_id(str(c.get("chunk_id", ""))),
        )
    )
    body = "".join(c.get("text") or "" for c in chunks_sorted)
    return body

def parse_args() -> argparse.Namespace:
    p = argparse.ArgumentParser(prog="export_markdown.py", description="Exportiert Notes aus Qdrant in Markdown.")
    p.add_argument("--out", required=True, help="Zielordner")
    p.add_argument("--prefix", default="", help="Collections-Prefix; überschreibt ENV COLLECTION_PREFIX")
    p.add_argument("--note-id", default="", help="nur eine Note exportieren")
    p.add_argument("--overwrite", action="store_true", help="vorhandene Dateien überschreiben")
    p.add_argument("--include-edges", default="none", choices=["none", "yaml", "footer"], help="Edges im Export anzeigen")
    return p.parse_args()

def main():
    args = parse_args()
    out_root = Path(args.out).resolve()
    out_root.mkdir(parents=True, exist_ok=True)

    prefix = args.prefix.strip() or os.environ.get("COLLECTION_PREFIX", "").strip() or "mindnet"
    cfg = QdrantConfig.from_env(prefix=prefix)
    client = get_client(cfg)
    ensure_collections(client, cfg)

    if args.note_id:
        notes = fetch_all_notes(client, cfg, only_note_id=args.note_id)
    else:
        notes = fetch_all_notes(client, cfg)

    exported = 0
    for n in notes:
        note_id = n.get("note_id") or n.get("id")
        if not note_id:
            continue

        rel = _normalize_rel_path(str(n.get("path") or f"{note_id}.md"))
        dst = out_root.joinpath(rel)

        body = str(n.get("fulltext") or "")
        if not body:
            chunks = fetch_chunks_for_note(client, cfg, note_id)
            body = _reconstruct_body_from_chunks(chunks)

        fm = {}
        for k in ("id", "title", "type", "status", "created", "tags", "priority", "due", "effort_min", "values", "goals", "embedding_exclude"):
            if k in n:
                fm[k] = n[k]
        for k in ("hash_signature", "hash_fulltext", "hash_body", "hash_frontmatter"):
            if k in n:
                fm[k] = n[k]

        edges_block = ""
        if args.include_edges in ("yaml", "footer"):
            try:
                edges = fetch_edges_for_note(client, cfg, note_id) or []
                if args.include_edges == "yaml":
                    fm["_edges"] = edges
                else:
                    edges_block = "\n\n---\n_edges_:\n" + json.dumps(edges, ensure_ascii=False, indent=2) + "\n"
            except Exception:
                pass

        if dst.exists() and not args.overwrite:
            decision = "skip"
        else:
            _ensure_parent(dst)
            content = _yaml_frontmatter(fm) + (body or "") + edges_block
            dst.write_text(content, encoding="utf-8")
            decision = "write"

        print(json.dumps({"note_id": note_id, "path": str(dst), "decision": decision}, ensure_ascii=False))
        if decision == "write":
            exported += 1

    print(f"Done. Exported notes: {exported}")

if __name__ == "__main__":
    main()