diff --git a/scripts/dump_note_chunks.py b/scripts/dump_note_chunks.py new file mode 100644 index 0000000..8aba330 --- /dev/null +++ b/scripts/dump_note_chunks.py @@ -0,0 +1,38 @@ +#!/usr/bin/env python3 +from __future__ import annotations +import argparse, os, glob +from app.core.parser import read_markdown, normalize_frontmatter, validate_required_frontmatter +from app.core.chunker import assemble_chunks + +def iter_md(root: str): + return [p for p in glob.glob(os.path.join(root, "**", "*.md"), recursive=True)] + +def main(): + ap = argparse.ArgumentParser() + ap.add_argument("--vault", required=True) + ap.add_argument("--note-id", required=True) + args = ap.parse_args() + + root = os.path.abspath(args.vault) + for path in iter_md(root): + parsed = read_markdown(path) + fm = normalize_frontmatter(parsed.frontmatter) + try: + validate_required_frontmatter(fm) + except Exception: + continue + if fm.get("id") != args.note_id: + continue + + chunks = assemble_chunks(fm["id"], parsed.body, fm.get("type","concept")) + print(f"# {fm['title']} ({fm['id']}) — {len(chunks)} chunks\n") + for ch in chunks: + print(f"--- {ch.id} | {ch.token_count} tok | {ch.section_path} ---") + print(ch.text.strip()) + print() + break + else: + print("Note nicht gefunden oder Frontmatter unvollständig.") + +if __name__ == "__main__": + main()