All checks were successful
Deploy mindnet to llm-node / deploy (push) Successful in 2s
181 lines
6.1 KiB
Python
181 lines
6.1 KiB
Python
#!/usr/bin/env python3
|
|
# -*- coding: utf-8 -*-
|
|
"""
|
|
Modul: scripts/export_markdown.py
|
|
Version: 1.6.1
|
|
Datum: 2025-11-07
|
|
|
|
Zweck
|
|
-----
|
|
Exportiert Notes aus Qdrant zurück in Markdown-Dateien (verlustarm):
|
|
• Pfade relativieren, Backslashes → Slashes
|
|
• Body aus 'fulltext' (falls vorhanden) oder Rekonstruktion via Chunks (seq/chunk_index)
|
|
• Optional: vorhandene Edges pro Note mit exportieren (--include-edges yaml|footer)
|
|
|
|
CLI
|
|
---
|
|
export COLLECTION_PREFIX="mindnet"
|
|
python3 -m scripts.export_markdown --out ./_exportVault
|
|
python3 -m scripts.export_markdown --out ./_exportVault --note-id <ID>
|
|
python3 -m scripts.export_markdown --out ./_exportVault --overwrite
|
|
python3 -m scripts.export_markdown --out ./_exportVault --include-edges yaml
|
|
python3 -m scripts.export_markdown --out ./_exportVault --include-edges footer
|
|
|
|
Parameter
|
|
---------
|
|
--out Zielwurzel (Ordner wird angelegt)
|
|
--prefix überschreibt ENV COLLECTION_PREFIX (Default: mindnet)
|
|
--note-id nur eine bestimmte Note exportieren
|
|
--overwrite vorhandene Dateien überschreiben
|
|
--include-edges none|yaml|footer (Default: none)
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import os
|
|
import json
|
|
from pathlib import Path
|
|
from typing import Dict, List, Tuple, Optional
|
|
|
|
from app.core.qdrant import (
|
|
QdrantConfig,
|
|
get_client,
|
|
fetch_all_notes,
|
|
fetch_chunks_for_note,
|
|
fetch_edges_for_note, # <— jetzt angebunden
|
|
ensure_collections,
|
|
)
|
|
|
|
def _normalize_rel_path(p: str) -> str:
|
|
p = (p or "").replace("\\", "/")
|
|
while p.startswith("/"):
|
|
p = p[1:]
|
|
return p
|
|
|
|
def _ensure_parent(p: Path):
|
|
p.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
def _yaml_frontmatter(d: Dict) -> str:
|
|
import io
|
|
def _ser(obj):
|
|
if isinstance(obj, str):
|
|
if any(ch in obj for ch in [":", "-", "{", "}", "[", "]", ",", "#", "&", "*", "!", "|", ">", "'", "\"", "%", "@", "`"]):
|
|
return '"' + obj.replace('"', '\\"') + '"'
|
|
return obj
|
|
if isinstance(obj, bool):
|
|
return "true" if obj else "false"
|
|
if obj is None:
|
|
return "null"
|
|
if isinstance(obj, (int, float)):
|
|
return str(obj)
|
|
if isinstance(obj, list):
|
|
return "[" + ", ".join(_ser(x) for x in obj) + "]"
|
|
if isinstance(obj, dict):
|
|
inner = []
|
|
for k in sorted(obj.keys()):
|
|
inner.append(f"{k}: {_ser(obj[k])}")
|
|
return "{ " + ", ".join(inner) + " }"
|
|
return '"' + str(obj).replace('"', '\\"') + '"'
|
|
|
|
buf = io.StringIO()
|
|
buf.write("---\n")
|
|
for k in sorted(d.keys()):
|
|
buf.write(f"{k}: {_ser(d[k])}\n")
|
|
buf.write("---\n")
|
|
return buf.getvalue()
|
|
|
|
def _reconstruct_body_from_chunks(chunks: List[Dict]) -> str:
|
|
if not chunks:
|
|
return ""
|
|
def _num_from_chunk_id(cid: str) -> int:
|
|
try:
|
|
if "#" in cid:
|
|
return int(cid.split("#", 1)[1])
|
|
return 0
|
|
except Exception:
|
|
return 0
|
|
chunks_sorted = sorted(
|
|
chunks,
|
|
key=lambda c: (
|
|
int(c.get("seq", c.get("chunk_index", 0))),
|
|
int(c.get("chunk_index", 0)),
|
|
_num_from_chunk_id(str(c.get("chunk_id", ""))),
|
|
)
|
|
)
|
|
body = "".join(c.get("text") or "" for c in chunks_sorted)
|
|
return body
|
|
|
|
def parse_args() -> argparse.Namespace:
|
|
p = argparse.ArgumentParser(prog="export_markdown.py", description="Exportiert Notes aus Qdrant in Markdown.")
|
|
p.add_argument("--out", required=True, help="Zielordner")
|
|
p.add_argument("--prefix", default="", help="Collections-Prefix; überschreibt ENV COLLECTION_PREFIX")
|
|
p.add_argument("--note-id", default="", help="nur eine Note exportieren")
|
|
p.add_argument("--overwrite", action="store_true", help="vorhandene Dateien überschreiben")
|
|
p.add_argument("--include-edges", default="none", choices=["none", "yaml", "footer"], help="Edges im Export anzeigen")
|
|
return p.parse_args()
|
|
|
|
def main():
|
|
args = parse_args()
|
|
out_root = Path(args.out).resolve()
|
|
out_root.mkdir(parents=True, exist_ok=True)
|
|
|
|
prefix = args.prefix.strip() or os.environ.get("COLLECTION_PREFIX", "").strip() or "mindnet"
|
|
cfg = QdrantConfig.from_env(prefix=prefix)
|
|
client = get_client(cfg)
|
|
ensure_collections(client, cfg)
|
|
|
|
if args.note_id:
|
|
notes = fetch_all_notes(client, cfg, only_note_id=args.note_id)
|
|
else:
|
|
notes = fetch_all_notes(client, cfg)
|
|
|
|
exported = 0
|
|
for n in notes:
|
|
note_id = n.get("note_id") or n.get("id")
|
|
if not note_id:
|
|
continue
|
|
|
|
rel = _normalize_rel_path(str(n.get("path") or f"{note_id}.md"))
|
|
dst = out_root.joinpath(rel)
|
|
|
|
body = str(n.get("fulltext") or "")
|
|
if not body:
|
|
chunks = fetch_chunks_for_note(client, cfg, note_id)
|
|
body = _reconstruct_body_from_chunks(chunks)
|
|
|
|
fm = {}
|
|
for k in ("id", "title", "type", "status", "created", "tags", "priority", "due", "effort_min", "values", "goals", "embedding_exclude"):
|
|
if k in n:
|
|
fm[k] = n[k]
|
|
for k in ("hash_signature", "hash_fulltext", "hash_body", "hash_frontmatter"):
|
|
if k in n:
|
|
fm[k] = n[k]
|
|
|
|
edges_block = ""
|
|
if args.include_edges in ("yaml", "footer"):
|
|
try:
|
|
edges = fetch_edges_for_note(client, cfg, note_id) or []
|
|
if args.include_edges == "yaml":
|
|
fm["_edges"] = edges
|
|
else:
|
|
edges_block = "\n\n---\n_edges_:\n" + json.dumps(edges, ensure_ascii=False, indent=2) + "\n"
|
|
except Exception:
|
|
pass
|
|
|
|
if dst.exists() and not args.overwrite:
|
|
decision = "skip"
|
|
else:
|
|
_ensure_parent(dst)
|
|
content = _yaml_frontmatter(fm) + (body or "") + edges_block
|
|
dst.write_text(content, encoding="utf-8")
|
|
decision = "write"
|
|
|
|
print(json.dumps({"note_id": note_id, "path": str(dst), "decision": decision}, ensure_ascii=False))
|
|
if decision == "write":
|
|
exported += 1
|
|
|
|
print(f"Done. Exported notes: {exported}")
|
|
|
|
if __name__ == "__main__":
|
|
main()
|