From a9b4643352aea013d69662b3473a47b545a44619 Mon Sep 17 00:00:00 2001
From: Lars <Lars@stommer.de>
Date: Wed, 3 Sep 2025 07:16:09 +0200
Subject: [PATCH] =?UTF-8?q?scripts/preview=5Fchunks.py=20hinzugef=C3=BCgt?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 scripts/preview_chunks.py | 44 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 44 insertions(+)
 create mode 100644 scripts/preview_chunks.py

diff --git a/scripts/preview_chunks.py b/scripts/preview_chunks.py
new file mode 100644
index 0000000..9046d2a
--- /dev/null
+++ b/scripts/preview_chunks.py
@@ -0,0 +1,44 @@
+#!/usr/bin/env python3
+from __future__ import annotations
+import argparse, os, glob, json
+from app.core.parser import read_markdown, normalize_frontmatter, validate_required_frontmatter
+from app.core.chunker import assemble_chunks
+from app.core.chunk_payload import make_chunk_payloads
+from app.core.note_payload import make_note_payload
+
+def iter_md(root: str) -> list[str]:
+    return [p for p in glob.glob(os.path.join(root, "**", "*.md"), recursive=True)]
+
+def main():
+    ap = argparse.ArgumentParser()
+    ap.add_argument("--vault", required=True)
+    ap.add_argument("--note-id", help="Optional: nur eine Note (Frontmatter id) verarbeiten")
+    args = ap.parse_args()
+
+    vault = os.path.abspath(args.vault)
+    files = iter_md(vault)
+    for path in files:
+        parsed = read_markdown(path)
+        fm = normalize_frontmatter(parsed.frontmatter)
+        try:
+            validate_required_frontmatter(fm)
+        except Exception:
+            continue
+        if args.note_id and fm.get("id") != args.note_id:
+            continue
+
+        # Note payload (für Metadaten)
+        note_pl = make_note_payload(parsed, vault_root=vault)
+        # Chunks bauen
+        chunks = assemble_chunks(fm["id"], parsed.body, fm.get("type", "concept"))
+        chunk_pls = make_chunk_payloads(fm, note_pl["path"], chunks)
+
+        print(json.dumps({
+            "note_id": fm["id"],
+            "title": fm["title"],
+            "num_chunks": len(chunk_pls),
+            "avg_tokens": round(sum(c["token_count"] for c in chunk_pls)/max(1,len(chunk_pls)), 1),
+            "chunks": [{"id": c["id"], "tokens": c["token_count"], "section": c.get("section_title"), "prev": c.get("neighbors",{}).get("prev"), "next": c.get("neighbors",{}).get("next")} for c in chunk_pls]
+        }, ensure_ascii=False))
+if __name__ == "__main__":
+    main()