From a97f757e3461994b825646cec40d2222309ebd13 Mon Sep 17 00:00:00 2001
From: Lars <Lars@stommer.de>
Date: Sun, 16 Nov 2025 20:58:27 +0100
Subject: [PATCH] Dateien nach "scripts" hochladen

---
 scripts/payload_dryrun.py | 165 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 165 insertions(+)
 create mode 100644 scripts/payload_dryrun.py

diff --git a/scripts/payload_dryrun.py b/scripts/payload_dryrun.py
new file mode 100644
index 0000000..9ad6e31
--- /dev/null
+++ b/scripts/payload_dryrun.py
@@ -0,0 +1,165 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+scripts/payload_dryrun.py
+(see docstring inside for usage)
+"""
+from __future__ import annotations
+import argparse, os, json, yaml, re
+from typing import Any, Dict, List, Optional
+from pathlib import Path
+
+from app.core.parser import read_markdown, normalize_frontmatter, validate_required_frontmatter
+from app.core.note_payload import make_note_payload
+from app.core.chunker import assemble_chunks
+from app.core.chunk_payload import make_chunk_payloads
+try:
+    from app.core.derive_edges import build_edges_for_note
+except Exception:
+    from app.core.edges import build_edges_for_note  # type: ignore
+
+def _env(n: str, d: Optional[str]=None) -> str:
+    v = os.getenv(n)
+    return v if v is not None else (d or "")
+
+def load_types() -> dict:
+    p = _env("MINDNET_TYPES_FILE", "./config/types.yaml")
+    try:
+        with open(p, "r", encoding="utf-8") as f:
+            import yaml
+            return yaml.safe_load(f) or {}
+    except Exception:
+        return {}
+
+def _deep_get(root: Any, path: str) -> Any:
+    cur = root
+    for key in path.split("."):
+        if not isinstance(cur, dict) or key not in cur:
+            return None
+        cur = cur[key]
+    return cur
+
+def eff_chunk_profile(note_type: str, fm: Dict[str, Any], reg: dict) -> Optional[str]:
+    if isinstance(fm.get("chunk_profile"), str):
+        return fm["chunk_profile"]
+    types = reg.get("types") if isinstance(reg.get("types"), dict) else reg
+    if isinstance(types, dict):
+        tp = types.get(note_type, {})
+        if isinstance(tp, dict) and isinstance(tp.get("chunk_profile"), str):
+            return tp["chunk_profile"]
+    return None
+
+def eff_retriever_weight(note_type: str, fm: Dict[str, Any], reg: dict) -> float:
+    if fm.get("retriever_weight") is not None:
+        try: return float(fm["retriever_weight"])
+        except Exception: pass
+    types = reg.get("types") if isinstance(reg.get("types"), dict) else reg
+    for path in [f"{note_type}.retriever_weight", f"{note_type}.retriever.weight", f"{note_type}.retrieval.weight",
+                 "defaults.retriever_weight", "defaults.retriever.weight", "global.retriever_weight", "global.retriever.weight"]:
+        val = _deep_get(types, path) if "." in path else (types.get(path) if isinstance(types, dict) else None)
+        if val is None and isinstance(reg, dict):
+            val = _deep_get(reg, f"types.{path}")
+        try:
+            v = float(val)
+            return v
+        except Exception:
+            pass
+    return 1.0
+
+def main():
+    ap = argparse.ArgumentParser()
+    ap.add_argument("--vault", required=True)
+    ap.add_argument("--note-id")
+    ap.add_argument("--with-edges", action="store_true")
+    args = ap.parse_args()
+
+    reg = load_types()
+    root = os.path.abspath(args.vault)
+
+    files: List[str] = []
+    for dp, _, fns in os.walk(root):
+        for fn in fns:
+            if fn.lower().endswith(".md"):
+                files.append(os.path.join(dp, fn))
+    files.sort()
+
+    for path in files:
+        parsed = read_markdown(path)
+        if not parsed:
+            continue
+        fm = normalize_frontmatter(parsed.frontmatter)
+        try:
+            validate_required_frontmatter(fm)
+        except Exception as e:
+            print(json.dumps({"path": path, "error": f"invalid frontmatter: {e}"}))
+            continue
+
+        if args.note_id and fm.get("id") != args.note_id:
+            continue
+
+        # Note payload
+        note_pl = make_note_payload(parsed, vault_root=root, hash_mode="body", hash_normalize="canonical", hash_source="parsed", file_path=path)
+
+        note_type = fm.get("type") or "concept"
+        cp = eff_chunk_profile(note_type, fm, reg)
+        rw = eff_retriever_weight(note_type, fm, reg)
+
+        # Das macht der Importer ebenfalls: explizite Spiegelung in Note-Payload
+        if cp is not None:
+            note_pl["chunk_profile"] = cp
+        note_pl["retriever_weight"] = rw
+
+        body_text = getattr(parsed, "body", "") or ""
+        chunks = assemble_chunks(fm["id"], body_text, fm.get("type","concept"))
+
+        chunk_note = {
+            "frontmatter": fm,
+            "id": fm.get("id"),
+            "type": fm.get("type"),
+            "title": fm.get("title"),
+            "path": note_pl.get("path") or path,
+            "note_id": note_pl.get("note_id"),
+            "tags": fm.get("tags"),
+        }
+        chunk_pls = make_chunk_payloads(
+            chunk_note,
+            note_pl["path"],
+            chunks,
+            note_text=body_text,
+            types_cfg=(reg.get("types") if isinstance(reg, dict) and isinstance(reg.get("types"), dict) else reg if isinstance(reg, dict) else {}),
+            file_path=path,
+        )
+
+        out = {
+            "note_id": note_pl.get("note_id") or fm.get("id"),
+            "title": fm.get("title"),
+            "type": fm.get("type"),
+            "resolved": {"retriever_weight": rw, "chunk_profile": cp},
+            "note_payload": {k: note_pl.get(k) for k in ("retriever_weight","chunk_profile")},
+            "chunks_summary": {
+                "count": len(chunk_pls),
+                "first": [
+                    {k: chunk_pls[i].get(k) for k in ("chunk_id","index","ord","retriever_weight","chunk_profile","neighbors_prev","neighbors_next")}
+                    for i in range(min(3, len(chunk_pls)))
+                ]
+            },
+            "path": note_pl.get("path")
+        }
+
+        if args.with_edges:
+            edges = build_edges_for_note(
+                note_id=note_pl.get("note_id") or fm.get("id"),
+                chunk_payloads=chunk_pls,
+                note_level_refs=note_pl.get("references") or [],
+                include_note_scope_refs=False,
+            )
+            kinds = {}
+            for e in edges:
+                k = (e.get("relation") or e.get("kind") or "edge")
+                kinds[k] = kinds.get(k, 0) + 1
+            out["edges_summary"] = {"total": len(edges), "by_kind": kinds}
+
+        print(json.dumps(out, ensure_ascii=False))
+
+if __name__ == "__main__":
+    main()