scripts/preview_chunks.py hinzugefügt
All checks were successful
Deploy mindnet to llm-node / deploy (push) Successful in 2s

This commit is contained in:
Lars 2025-09-03 07:16:09 +02:00
parent 8ab93da536
commit a9b4643352

44
scripts/preview_chunks.py Normal file
View File

@ -0,0 +1,44 @@
#!/usr/bin/env python3
from __future__ import annotations
import argparse, os, glob, json
from app.core.parser import read_markdown, normalize_frontmatter, validate_required_frontmatter
from app.core.chunker import assemble_chunks
from app.core.chunk_payload import make_chunk_payloads
from app.core.note_payload import make_note_payload
def iter_md(root: str) -> list[str]:
return [p for p in glob.glob(os.path.join(root, "**", "*.md"), recursive=True)]
def main():
ap = argparse.ArgumentParser()
ap.add_argument("--vault", required=True)
ap.add_argument("--note-id", help="Optional: nur eine Note (Frontmatter id) verarbeiten")
args = ap.parse_args()
vault = os.path.abspath(args.vault)
files = iter_md(vault)
for path in files:
parsed = read_markdown(path)
fm = normalize_frontmatter(parsed.frontmatter)
try:
validate_required_frontmatter(fm)
except Exception:
continue
if args.note_id and fm.get("id") != args.note_id:
continue
# Note payload (für Metadaten)
note_pl = make_note_payload(parsed, vault_root=vault)
# Chunks bauen
chunks = assemble_chunks(fm["id"], parsed.body, fm.get("type", "concept"))
chunk_pls = make_chunk_payloads(fm, note_pl["path"], chunks)
print(json.dumps({
"note_id": fm["id"],
"title": fm["title"],
"num_chunks": len(chunk_pls),
"avg_tokens": round(sum(c["token_count"] for c in chunk_pls)/max(1,len(chunk_pls)), 1),
"chunks": [{"id": c["id"], "tokens": c["token_count"], "section": c.get("section_title"), "prev": c.get("neighbors",{}).get("prev"), "next": c.get("neighbors",{}).get("next")} for c in chunk_pls]
}, ensure_ascii=False))
if __name__ == "__main__":
main()