mindnet/scripts/payload_dryrun.py
Lars 3d74eff224
All checks were successful
Deploy mindnet to llm-node / deploy (push) Successful in 3s
scripts/payload_dryrun.py aktualisiert
2025-11-16 21:25:08 +01:00

116 lines
4.1 KiB
Python

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
scripts/payload_dryrun.py (zeigt, was VOR dem Upsert tatsächlich in den Payloads steht)
- KEIN Überschreiben der Note-Payload mehr
- types.yaml ist maßgeblich (gemäß app/core/note_payload.py & chunk_payload.py)
"""
from __future__ import annotations
import argparse, os, json
from typing import Any, Dict, List, Optional
from app.core.parser import read_markdown, normalize_frontmatter, validate_required_frontmatter
from app.core.note_payload import make_note_payload
from app.core.chunker import assemble_chunks
from app.core.chunk_payload import make_chunk_payloads
try:
from app.core.derive_edges import build_edges_for_note
except Exception:
from app.core.edges import build_edges_for_note # type: ignore
def main():
ap = argparse.ArgumentParser()
ap.add_argument("--vault", required=True)
ap.add_argument("--note-id")
ap.add_argument("--with-edges", action="store_true")
args = ap.parse_args()
root = os.path.abspath(args.vault)
files: List[str] = []
for dp, _, fns in os.walk(root):
for fn in fns:
if fn.lower().endswith(".md"):
files.append(os.path.join(dp, fn))
files.sort()
for path in files:
parsed = read_markdown(path)
if not parsed:
continue
fm = normalize_frontmatter(parsed.frontmatter)
try:
validate_required_frontmatter(fm)
except Exception as e:
print(json.dumps({"path": path, "error": f"invalid frontmatter: {e}"}))
continue
if args.note_id and fm.get("id") != args.note_id:
continue
# Note-Payload exakt so, wie der Importer ihn baut (types.yaml maßgeblich)
note_pl = make_note_payload(parsed,
vault_root=root,
hash_mode="body",
hash_normalize="canonical",
hash_source="parsed",
file_path=path)
body_text = getattr(parsed, "body", "") or ""
chunks = assemble_chunks(fm["id"], body_text, fm.get("type","concept"))
chunk_note = {
"frontmatter": fm,
"id": fm.get("id"),
"type": fm.get("type"),
"title": fm.get("title"),
"path": note_pl.get("path") or path,
"note_id": note_pl.get("note_id"),
"tags": fm.get("tags"),
}
# make_chunk_payloads bestimmt Werte ebenfalls aus types.yaml (Frontmatter wird ignoriert)
chunk_pls = make_chunk_payloads(
chunk_note,
note_pl["path"],
chunks,
note_text=body_text,
types_cfg=None, # Loader aus Datei; kein Override von außen
file_path=path,
)
out = {
"note_id": note_pl.get("note_id") or fm.get("id"),
"title": fm.get("title"),
"type": fm.get("type"),
"note_payload": {
"retriever_weight": note_pl.get("retriever_weight"),
"chunk_profile": note_pl.get("chunk_profile")
},
"chunks_summary": {
"count": len(chunk_pls),
"first": [
{k: chunk_pls[i].get(k) for k in ("chunk_id","index","ord","retriever_weight","chunk_profile","neighbors_prev","neighbors_next")}
for i in range(min(3, len(chunk_pls)))
]
},
"path": note_pl.get("path")
}
if args.with_edges:
edges = build_edges_for_note(
note_id=note_pl.get("note_id") or fm.get("id"),
chunk_payloads=chunk_pls,
note_level_refs=note_pl.get("references") or [],
include_note_scope_refs=False,
)
kinds = {}
for e in edges:
k = (e.get("relation") or e.get("kind") or "edge")
kinds[k] = kinds.get(k, 0) + 1
out["edges_summary"] = {"total": len(edges), "by_kind": kinds}
print(json.dumps(out, ensure_ascii=False))
if __name__ == "__main__":
main()