#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ scripts/payload_dryrun.py (see docstring inside for usage) """ from __future__ import annotations import argparse, os, json, yaml, re from typing import Any, Dict, List, Optional from pathlib import Path from app.core.parser import read_markdown, normalize_frontmatter, validate_required_frontmatter from app.core.note_payload import make_note_payload from app.core.chunker import assemble_chunks from app.core.chunk_payload import make_chunk_payloads try: from app.core.derive_edges import build_edges_for_note except Exception: from app.core.edges import build_edges_for_note # type: ignore def _env(n: str, d: Optional[str]=None) -> str: v = os.getenv(n) return v if v is not None else (d or "") def load_types() -> dict: p = _env("MINDNET_TYPES_FILE", "./config/types.yaml") try: with open(p, "r", encoding="utf-8") as f: import yaml return yaml.safe_load(f) or {} except Exception: return {} def _deep_get(root: Any, path: str) -> Any: cur = root for key in path.split("."): if not isinstance(cur, dict) or key not in cur: return None cur = cur[key] return cur def eff_chunk_profile(note_type: str, fm: Dict[str, Any], reg: dict) -> Optional[str]: if isinstance(fm.get("chunk_profile"), str): return fm["chunk_profile"] types = reg.get("types") if isinstance(reg.get("types"), dict) else reg if isinstance(types, dict): tp = types.get(note_type, {}) if isinstance(tp, dict) and isinstance(tp.get("chunk_profile"), str): return tp["chunk_profile"] return None def eff_retriever_weight(note_type: str, fm: Dict[str, Any], reg: dict) -> float: if fm.get("retriever_weight") is not None: try: return float(fm["retriever_weight"]) except Exception: pass types = reg.get("types") if isinstance(reg.get("types"), dict) else reg for path in [f"{note_type}.retriever_weight", f"{note_type}.retriever.weight", f"{note_type}.retrieval.weight", "defaults.retriever_weight", "defaults.retriever.weight", "global.retriever_weight", "global.retriever.weight"]: val = _deep_get(types, path) if "." in path else (types.get(path) if isinstance(types, dict) else None) if val is None and isinstance(reg, dict): val = _deep_get(reg, f"types.{path}") try: v = float(val) return v except Exception: pass return 1.0 def main(): ap = argparse.ArgumentParser() ap.add_argument("--vault", required=True) ap.add_argument("--note-id") ap.add_argument("--with-edges", action="store_true") args = ap.parse_args() reg = load_types() root = os.path.abspath(args.vault) files: List[str] = [] for dp, _, fns in os.walk(root): for fn in fns: if fn.lower().endswith(".md"): files.append(os.path.join(dp, fn)) files.sort() for path in files: parsed = read_markdown(path) if not parsed: continue fm = normalize_frontmatter(parsed.frontmatter) try: validate_required_frontmatter(fm) except Exception as e: print(json.dumps({"path": path, "error": f"invalid frontmatter: {e}"})) continue if args.note_id and fm.get("id") != args.note_id: continue # Note payload note_pl = make_note_payload(parsed, vault_root=root, hash_mode="body", hash_normalize="canonical", hash_source="parsed", file_path=path) note_type = fm.get("type") or "concept" cp = eff_chunk_profile(note_type, fm, reg) rw = eff_retriever_weight(note_type, fm, reg) # Das macht der Importer ebenfalls: explizite Spiegelung in Note-Payload if cp is not None: note_pl["chunk_profile"] = cp note_pl["retriever_weight"] = rw body_text = getattr(parsed, "body", "") or "" chunks = assemble_chunks(fm["id"], body_text, fm.get("type","concept")) chunk_note = { "frontmatter": fm, "id": fm.get("id"), "type": fm.get("type"), "title": fm.get("title"), "path": note_pl.get("path") or path, "note_id": note_pl.get("note_id"), "tags": fm.get("tags"), } chunk_pls = make_chunk_payloads( chunk_note, note_pl["path"], chunks, note_text=body_text, types_cfg=(reg.get("types") if isinstance(reg, dict) and isinstance(reg.get("types"), dict) else reg if isinstance(reg, dict) else {}), file_path=path, ) out = { "note_id": note_pl.get("note_id") or fm.get("id"), "title": fm.get("title"), "type": fm.get("type"), "resolved": {"retriever_weight": rw, "chunk_profile": cp}, "note_payload": {k: note_pl.get(k) for k in ("retriever_weight","chunk_profile")}, "chunks_summary": { "count": len(chunk_pls), "first": [ {k: chunk_pls[i].get(k) for k in ("chunk_id","index","ord","retriever_weight","chunk_profile","neighbors_prev","neighbors_next")} for i in range(min(3, len(chunk_pls))) ] }, "path": note_pl.get("path") } if args.with_edges: edges = build_edges_for_note( note_id=note_pl.get("note_id") or fm.get("id"), chunk_payloads=chunk_pls, note_level_refs=note_pl.get("references") or [], include_note_scope_refs=False, ) kinds = {} for e in edges: k = (e.get("relation") or e.get("kind") or "edge") kinds[k] = kinds.get(k, 0) + 1 out["edges_summary"] = {"total": len(edges), "by_kind": kinds} print(json.dumps(out, ensure_ascii=False)) if __name__ == "__main__": main()