From 0d6eb4dac44776f961d200f1be9c87810ccb4401 Mon Sep 17 00:00:00 2001 From: Lars Date: Wed, 24 Sep 2025 12:34:53 +0200 Subject: [PATCH] =?UTF-8?q?tests/compare=5Fvaults.py=20hinzugef=C3=BCgt?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/compare_vaults.py | 105 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 105 insertions(+) create mode 100644 tests/compare_vaults.py diff --git a/tests/compare_vaults.py b/tests/compare_vaults.py new file mode 100644 index 0000000..6364873 --- /dev/null +++ b/tests/compare_vaults.py @@ -0,0 +1,105 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Script: tests/compare_vaults.py +Version: 1.0.0 +Datum: 2025-09-10 + +Funktion +-------- +Vergleicht zwei Ordner mit Markdown-Dateien (Vault vs. Export). Fokus: + - body: reiner Body-Text (Whitespace tolerant) + - frontmatter: YAML-Felder selektiv (id, title, type, tags, status, aliases) + - both: erst FM, dann Body + +Aufrufe +------- + python3 tests/compare_vaults.py --src ./test_vault --dst ./_exportVault --focus body + python3 tests/compare_vaults.py --src ./test_vault --dst ./_exportVault --focus both +""" +from __future__ import annotations + +import argparse, os, sys, glob +from typing import Tuple, Dict, Any +import yaml + +def split_md(p: str) -> Tuple[Dict[str, Any], str]: + with open(p, "r", encoding="utf-8") as f: + s = f.read() + if s.startswith("---"): + try: + fm_txt, body = s.split("\n---\n", 1) + fm = yaml.safe_load(fm_txt.strip("- \n")) or {} + except Exception: + fm, body = {}, s + else: + fm, body = {}, s + return fm, body.strip() + +def norm_body(s: str) -> str: + return "\n".join([ln.rstrip() for ln in s.strip().splitlines()]).strip() + +def main(): + ap = argparse.ArgumentParser() + ap.add_argument("--src", required=True, help="Original-Vault") + ap.add_argument("--dst", required=True, help="Export-Ordner") + ap.add_argument("--focus", choices=["body","frontmatter","both"], default="body") + args = ap.parse_args() + + src = os.path.abspath(args.src) + dst = os.path.abspath(args.dst) + + # Map per Note-ID + by_id = {} + for p in glob.glob(os.path.join(src, "**", "*.md"), recursive=True): + fm, body = split_md(p) + nid = fm.get("id") or os.path.splitext(os.path.basename(p))[0] + by_id.setdefault(nid, {})["src"] = (p, fm, body) + for p in glob.glob(os.path.join(dst, "**", "*.md"), recursive=True): + fm, body = split_md(p) + nid = fm.get("id") or os.path.splitext(os.path.basename(p))[0] + by_id.setdefault(nid, {})["dst"] = (p, fm, body) + + mismatches = 0 + for nid, d in sorted(by_id.items()): + src_t = d.get("src") + dst_t = d.get("dst") + if not src_t or not dst_t: + print({"note_id": nid, "status": "missing", "src": bool(src_t), "dst": bool(dst_t)}) + mismatches += 1 + continue + sp, sfm, sbody = src_t + dp, dfm, dbody = dst_t + + # frontmatter compare (subset) + fm_ok = True + fm_keys = ["id","title","type","status","tags","aliases"] + if args.focus in ("frontmatter","both"): + for k in fm_keys: + if (sfm.get(k) or None) != (dfm.get(k) or None): + fm_ok = False + break + # body compare + body_ok = True + if args.focus in ("body","both"): + if norm_body(sbody) != norm_body(dbody): + body_ok = False + + if not (fm_ok and body_ok): + mismatches += 1 + print({ + "note_id": nid, + "frontmatter_equal": fm_ok, + "body_equal": body_ok, + "src_path": sp, + "dst_path": dp + }) + + if mismatches: + print({"summary": "DIFFS", "count": mismatches}) + sys.exit(1) + else: + print({"summary": "OK", "count": 0}) + +if __name__ == "__main__": + main()