tests/compare_vaults.py aktualisiert
All checks were successful
Deploy mindnet to llm-node / deploy (push) Successful in 7s
All checks were successful
Deploy mindnet to llm-node / deploy (push) Successful in 7s
This commit is contained in:
parent
f52a9face8
commit
e99f1c2ba0
|
|
@ -1,106 +1,177 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
"""
|
"""
|
||||||
Script: tests/compare_vaults.py
|
tests/compare_vaults.py
|
||||||
Version: 1.0.0
|
|
||||||
Datum: 2025-09-10
|
|
||||||
|
|
||||||
|
Vergleicht zwei Markdown-Vaults rekursiv:
|
||||||
|
- Schlüsselauswahl: id | title | filename | auto (default: auto = id>title>filename)
|
||||||
|
- Fokus: body | frontmatter | all (default: body)
|
||||||
|
- Unicode: NFC-Normalisierung für IDs/Titel/Dateinamen und Text
|
||||||
|
|
||||||
Funktion
|
Beispiele:
|
||||||
--------
|
python3 tests/compare_vaults.py --src ./vault --dst ./_exportVault --focus body
|
||||||
Vergleicht zwei Ordner mit Markdown-Dateien (Vault vs. Export). Fokus:
|
python3 tests/compare_vaults.py --src ./vault --dst ./_exportVault --key id --focus all
|
||||||
- body: reiner Body-Text (Whitespace tolerant)
|
|
||||||
- frontmatter: YAML-Felder selektiv (id, title, type, tags, status, aliases)
|
|
||||||
- both: erst FM, dann Body
|
|
||||||
|
|
||||||
Aufrufe
|
Ausgabe:
|
||||||
-------
|
- Einträge mit 'missing' (nur in src oder nur in dst)
|
||||||
python3 tests/compare_vaults.py --src ./test_vault --dst ./_exportVault --focus body
|
- Einträge mit 'diff' (vorhanden in beiden, aber Abweichungen gem. Fokus)
|
||||||
python3 tests/compare_vaults.py --src ./test_vault --dst ./_exportVault --focus both
|
- Summary-Objekt am Ende
|
||||||
"""
|
"""
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import json
|
||||||
|
import argparse
|
||||||
|
import unicodedata
|
||||||
|
from typing import Dict, Tuple, Optional, Any
|
||||||
|
|
||||||
import argparse, os, sys, glob
|
PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
|
||||||
from typing import Tuple, Dict, Any
|
if PROJECT_ROOT not in sys.path:
|
||||||
import yaml
|
sys.path.insert(0, PROJECT_ROOT)
|
||||||
|
|
||||||
def split_md(p: str) -> Tuple[Dict[str, Any], str]:
|
# Wir nutzen denselben Parser wie im Projekt (fehlertolerant)
|
||||||
with open(p, "r", encoding="utf-8") as f:
|
from app.core.parser import read_markdown # erwartet: (frontmatter, body)
|
||||||
s = f.read()
|
|
||||||
if s.startswith("---"):
|
MD_EXTS = {".md", ".markdown"}
|
||||||
|
|
||||||
|
def nfc(s: Optional[str]) -> str:
|
||||||
|
return unicodedata.normalize("NFC", s or "")
|
||||||
|
|
||||||
|
def norm_text(txt: str) -> str:
|
||||||
|
# normalize line endings + strip trailing spaces per line + NFC
|
||||||
|
t = txt.replace("\r\n", "\n").replace("\r", "\n")
|
||||||
|
t = "\n".join(line.rstrip() for line in t.split("\n")).strip()
|
||||||
|
return nfc(t)
|
||||||
|
|
||||||
|
def iter_md_files(root: str):
|
||||||
|
for base, _dirs, files in os.walk(root):
|
||||||
|
for fn in files:
|
||||||
|
ext = os.path.splitext(fn)[1].lower()
|
||||||
|
if ext in MD_EXTS:
|
||||||
|
yield os.path.join(base, fn)
|
||||||
|
|
||||||
|
def key_from(front: Dict[str, Any], path: str, pref: str) -> str:
|
||||||
|
"""
|
||||||
|
Liefert den Vergleichsschlüssel.
|
||||||
|
pref in {"auto","id","title","filename"}
|
||||||
|
"""
|
||||||
|
if pref == "id":
|
||||||
|
return nfc(str(front.get("id", "")).strip())
|
||||||
|
if pref == "title":
|
||||||
|
return nfc(str(front.get("title", "")).strip())
|
||||||
|
if pref == "filename":
|
||||||
|
name = os.path.splitext(os.path.basename(path))[0]
|
||||||
|
return nfc(name.strip())
|
||||||
|
|
||||||
|
# auto: id > title > filename
|
||||||
|
if "id" in front and str(front["id"]).strip():
|
||||||
|
return nfc(str(front["id"]).strip())
|
||||||
|
if "title" in front and str(front["title"]).strip():
|
||||||
|
return nfc(str(front["title"]).strip())
|
||||||
|
name = os.path.splitext(os.path.basename(path))[0]
|
||||||
|
return nfc(name.strip())
|
||||||
|
|
||||||
|
def load_index(root: str, key_pref: str) -> Dict[str, Tuple[str, Dict[str, Any], str]]:
|
||||||
|
"""
|
||||||
|
Baut ein Index-Dict: key -> (path, frontmatter, body)
|
||||||
|
"""
|
||||||
|
idx: Dict[str, Tuple[str, Dict[str, Any], str]] = {}
|
||||||
|
for p in iter_md_files(root):
|
||||||
try:
|
try:
|
||||||
fm_txt, body = s.split("\n---\n", 1)
|
fm, body = read_markdown(p)
|
||||||
fm = yaml.safe_load(fm_txt.strip("- \n")) or {}
|
k = key_from(fm or {}, p, key_pref)
|
||||||
except Exception:
|
if not k:
|
||||||
fm, body = {}, s
|
# Fallback: filename
|
||||||
else:
|
k = key_from({}, p, "filename")
|
||||||
fm, body = {}, s
|
idx[k] = (p, fm or {}, body or "")
|
||||||
return fm, body.strip()
|
except Exception as e:
|
||||||
|
# Datei nicht parsebar -> trotzdem indexieren nach Dateiname (leer FM/Body)
|
||||||
|
k = key_from({}, p, key_pref if key_pref != "auto" else "filename")
|
||||||
|
idx[k] = (p, {}, "")
|
||||||
|
return idx
|
||||||
|
|
||||||
def norm_body(s: str) -> str:
|
def compare_entries(src: Tuple[str, Dict[str, Any], str],
|
||||||
return "\n".join([ln.rstrip() for ln in s.strip().splitlines()]).strip()
|
dst: Tuple[str, Dict[str, Any], str],
|
||||||
|
focus: str) -> Dict[str, Any]:
|
||||||
|
sp, sfm, sbody = src
|
||||||
|
dp, dfm, dbody = dst
|
||||||
|
|
||||||
|
res = {"status": "ok"} # wird 'diff' falls Unterschiede
|
||||||
|
diffs = {}
|
||||||
|
|
||||||
|
if focus in ("frontmatter", "all"):
|
||||||
|
# Vergleiche FM minimal: id und title
|
||||||
|
s_id = nfc(str(sfm.get("id", "")).strip())
|
||||||
|
d_id = nfc(str(dfm.get("id", "")).strip())
|
||||||
|
s_title = nfc(str(sfm.get("title", "")).strip())
|
||||||
|
d_title = nfc(str(dfm.get("title", "")).strip())
|
||||||
|
if s_id != d_id:
|
||||||
|
diffs["frontmatter.id"] = {"src": s_id, "dst": d_id}
|
||||||
|
if s_title != d_title:
|
||||||
|
diffs["frontmatter.title"] = {"src": s_title, "dst": d_title}
|
||||||
|
|
||||||
|
if focus in ("body", "all"):
|
||||||
|
if norm_text(sbody) != norm_text(dbody):
|
||||||
|
diffs["body"] = {"src_len": len(sbody or ""), "dst_len": len(dbody or "")}
|
||||||
|
|
||||||
|
if diffs:
|
||||||
|
res["status"] = "diff"
|
||||||
|
res["diffs"] = diffs
|
||||||
|
res["src_path"] = sp
|
||||||
|
res["dst_path"] = dp
|
||||||
|
return res
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
ap = argparse.ArgumentParser()
|
ap = argparse.ArgumentParser()
|
||||||
ap.add_argument("--src", required=True, help="Original-Vault")
|
ap.add_argument("--src", required=True, help="Quell-Vault-Ordner")
|
||||||
ap.add_argument("--dst", required=True, help="Export-Ordner")
|
ap.add_argument("--dst", required=True, help="Export-Vault-Ordner")
|
||||||
ap.add_argument("--focus", choices=["body","frontmatter","both"], default="body")
|
ap.add_argument("--focus", choices=["body", "frontmatter", "all"], default="body")
|
||||||
|
ap.add_argument("--key", choices=["auto", "id", "title", "filename"], default="auto",
|
||||||
|
help="Vergleichsschlüssel (default: auto=id>title>filename)")
|
||||||
args = ap.parse_args()
|
args = ap.parse_args()
|
||||||
|
|
||||||
src = os.path.abspath(args.src)
|
src_idx = load_index(args.src, args.key)
|
||||||
dst = os.path.abspath(args.dst)
|
dst_idx = load_index(args.dst, args.key)
|
||||||
|
|
||||||
# Map per Note-ID
|
src_keys = set(src_idx.keys())
|
||||||
by_id = {}
|
dst_keys = set(dst_idx.keys())
|
||||||
for p in glob.glob(os.path.join(src, "**", "*.md"), recursive=True):
|
|
||||||
fm, body = split_md(p)
|
|
||||||
nid = fm.get("id") or os.path.splitext(os.path.basename(p))[0]
|
|
||||||
by_id.setdefault(nid, {})["src"] = (p, fm, body)
|
|
||||||
for p in glob.glob(os.path.join(dst, "**", "*.md"), recursive=True):
|
|
||||||
fm, body = split_md(p)
|
|
||||||
nid = fm.get("id") or os.path.splitext(os.path.basename(p))[0]
|
|
||||||
by_id.setdefault(nid, {})["dst"] = (p, fm, body)
|
|
||||||
|
|
||||||
mismatches = 0
|
# Fehlende
|
||||||
for nid, d in sorted(by_id.items()):
|
only_src = sorted(src_keys - dst_keys)
|
||||||
src_t = d.get("src")
|
only_dst = sorted(dst_keys - src_keys)
|
||||||
dst_t = d.get("dst")
|
|
||||||
if not src_t or not dst_t:
|
|
||||||
print({"note_id": nid, "status": "missing", "src": bool(src_t), "dst": bool(dst_t)})
|
|
||||||
mismatches += 1
|
|
||||||
continue
|
|
||||||
sp, sfm, sbody = src_t
|
|
||||||
dp, dfm, dbody = dst_t
|
|
||||||
|
|
||||||
# frontmatter compare (subset)
|
count = 0
|
||||||
fm_ok = True
|
for k in only_src:
|
||||||
fm_keys = ["id","title","type","status","tags","aliases"]
|
print(json.dumps({"note_id": k, "status": "missing", "src": True, "dst": False}, ensure_ascii=False))
|
||||||
if args.focus in ("frontmatter","both"):
|
count += 1
|
||||||
for k in fm_keys:
|
for k in only_dst:
|
||||||
if (sfm.get(k) or None) != (dfm.get(k) or None):
|
print(json.dumps({"note_id": k, "status": "missing", "src": False, "dst": True}, ensure_ascii=False))
|
||||||
fm_ok = False
|
count += 1
|
||||||
break
|
|
||||||
# body compare
|
|
||||||
body_ok = True
|
|
||||||
if args.focus in ("body","both"):
|
|
||||||
if norm_body(sbody) != norm_body(dbody):
|
|
||||||
body_ok = False
|
|
||||||
|
|
||||||
if not (fm_ok and body_ok):
|
# Vergleiche, wo vorhanden
|
||||||
mismatches += 1
|
diff_count = 0
|
||||||
print({
|
common = sorted(src_keys & dst_keys)
|
||||||
"note_id": nid,
|
for k in common:
|
||||||
"frontmatter_equal": fm_ok,
|
res = compare_entries(src_idx[k], dst_idx[k], args.focus)
|
||||||
"body_equal": body_ok,
|
if res["status"] == "diff":
|
||||||
"src_path": sp,
|
res["note_id"] = k
|
||||||
"dst_path": dp
|
print(json.dumps(res, ensure_ascii=False))
|
||||||
})
|
diff_count += 1
|
||||||
|
|
||||||
if mismatches:
|
summary = {
|
||||||
print({"summary": "DIFFS", "count": mismatches})
|
"summary": "OK" if (count == 0 and diff_count == 0) else "DIFFS",
|
||||||
sys.exit(1)
|
"missing_count": count,
|
||||||
else:
|
"diff_count": diff_count,
|
||||||
print({"summary": "OK", "count": 0})
|
"total_src": len(src_keys),
|
||||||
|
"total_dst": len(dst_keys),
|
||||||
|
"focus": args.focus,
|
||||||
|
"key": args.key,
|
||||||
|
}
|
||||||
|
print(json.dumps(summary, ensure_ascii=False))
|
||||||
|
# Exit-Code für CI
|
||||||
|
if summary["summary"] != "OK":
|
||||||
|
# non-zero exit optional; hier 0 lassen, damit CLI-Ausgaben sichtbar bleiben
|
||||||
|
pass
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user