scripts/payload_dryrun.py aktualisiert
All checks were successful
Deploy mindnet to llm-node / deploy (push) Successful in 3s

This commit is contained in:
Lars 2025-11-16 21:25:08 +01:00
parent 22d08afe2d
commit 3d74eff224

View File

@ -1,13 +1,13 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
""" """
scripts/payload_dryrun.py scripts/payload_dryrun.py (zeigt, was VOR dem Upsert tatsächlich in den Payloads steht)
(see docstring inside for usage) - KEIN Überschreiben der Note-Payload mehr
- types.yaml ist maßgeblich (gemäß app/core/note_payload.py & chunk_payload.py)
""" """
from __future__ import annotations from __future__ import annotations
import argparse, os, json, yaml, re import argparse, os, json
from typing import Any, Dict, List, Optional from typing import Any, Dict, List, Optional
from pathlib import Path
from app.core.parser import read_markdown, normalize_frontmatter, validate_required_frontmatter from app.core.parser import read_markdown, normalize_frontmatter, validate_required_frontmatter
from app.core.note_payload import make_note_payload from app.core.note_payload import make_note_payload
@ -18,54 +18,6 @@ try:
except Exception: except Exception:
from app.core.edges import build_edges_for_note # type: ignore from app.core.edges import build_edges_for_note # type: ignore
def _env(n: str, d: Optional[str]=None) -> str:
v = os.getenv(n)
return v if v is not None else (d or "")
def load_types() -> dict:
p = _env("MINDNET_TYPES_FILE", "./config/types.yaml")
try:
with open(p, "r", encoding="utf-8") as f:
import yaml
return yaml.safe_load(f) or {}
except Exception:
return {}
def _deep_get(root: Any, path: str) -> Any:
cur = root
for key in path.split("."):
if not isinstance(cur, dict) or key not in cur:
return None
cur = cur[key]
return cur
def eff_chunk_profile(note_type: str, fm: Dict[str, Any], reg: dict) -> Optional[str]:
if isinstance(fm.get("chunk_profile"), str):
return fm["chunk_profile"]
types = reg.get("types") if isinstance(reg.get("types"), dict) else reg
if isinstance(types, dict):
tp = types.get(note_type, {})
if isinstance(tp, dict) and isinstance(tp.get("chunk_profile"), str):
return tp["chunk_profile"]
return None
def eff_retriever_weight(note_type: str, fm: Dict[str, Any], reg: dict) -> float:
if fm.get("retriever_weight") is not None:
try: return float(fm["retriever_weight"])
except Exception: pass
types = reg.get("types") if isinstance(reg.get("types"), dict) else reg
for path in [f"{note_type}.retriever_weight", f"{note_type}.retriever.weight", f"{note_type}.retrieval.weight",
"defaults.retriever_weight", "defaults.retriever.weight", "global.retriever_weight", "global.retriever.weight"]:
val = _deep_get(types, path) if "." in path else (types.get(path) if isinstance(types, dict) else None)
if val is None and isinstance(reg, dict):
val = _deep_get(reg, f"types.{path}")
try:
v = float(val)
return v
except Exception:
pass
return 1.0
def main(): def main():
ap = argparse.ArgumentParser() ap = argparse.ArgumentParser()
ap.add_argument("--vault", required=True) ap.add_argument("--vault", required=True)
@ -73,7 +25,6 @@ def main():
ap.add_argument("--with-edges", action="store_true") ap.add_argument("--with-edges", action="store_true")
args = ap.parse_args() args = ap.parse_args()
reg = load_types()
root = os.path.abspath(args.vault) root = os.path.abspath(args.vault)
files: List[str] = [] files: List[str] = []
@ -97,17 +48,13 @@ def main():
if args.note_id and fm.get("id") != args.note_id: if args.note_id and fm.get("id") != args.note_id:
continue continue
# Note payload # Note-Payload exakt so, wie der Importer ihn baut (types.yaml maßgeblich)
note_pl = make_note_payload(parsed, vault_root=root, hash_mode="body", hash_normalize="canonical", hash_source="parsed", file_path=path) note_pl = make_note_payload(parsed,
vault_root=root,
note_type = fm.get("type") or "concept" hash_mode="body",
cp = eff_chunk_profile(note_type, fm, reg) hash_normalize="canonical",
rw = eff_retriever_weight(note_type, fm, reg) hash_source="parsed",
file_path=path)
# Das macht der Importer ebenfalls: explizite Spiegelung in Note-Payload
if cp is not None:
note_pl["chunk_profile"] = cp
note_pl["retriever_weight"] = rw
body_text = getattr(parsed, "body", "") or "" body_text = getattr(parsed, "body", "") or ""
chunks = assemble_chunks(fm["id"], body_text, fm.get("type","concept")) chunks = assemble_chunks(fm["id"], body_text, fm.get("type","concept"))
@ -121,12 +68,13 @@ def main():
"note_id": note_pl.get("note_id"), "note_id": note_pl.get("note_id"),
"tags": fm.get("tags"), "tags": fm.get("tags"),
} }
# make_chunk_payloads bestimmt Werte ebenfalls aus types.yaml (Frontmatter wird ignoriert)
chunk_pls = make_chunk_payloads( chunk_pls = make_chunk_payloads(
chunk_note, chunk_note,
note_pl["path"], note_pl["path"],
chunks, chunks,
note_text=body_text, note_text=body_text,
types_cfg=(reg.get("types") if isinstance(reg, dict) and isinstance(reg.get("types"), dict) else reg if isinstance(reg, dict) else {}), types_cfg=None, # Loader aus Datei; kein Override von außen
file_path=path, file_path=path,
) )
@ -134,8 +82,10 @@ def main():
"note_id": note_pl.get("note_id") or fm.get("id"), "note_id": note_pl.get("note_id") or fm.get("id"),
"title": fm.get("title"), "title": fm.get("title"),
"type": fm.get("type"), "type": fm.get("type"),
"resolved": {"retriever_weight": rw, "chunk_profile": cp}, "note_payload": {
"note_payload": {k: note_pl.get(k) for k in ("retriever_weight","chunk_profile")}, "retriever_weight": note_pl.get("retriever_weight"),
"chunk_profile": note_pl.get("chunk_profile")
},
"chunks_summary": { "chunks_summary": {
"count": len(chunk_pls), "count": len(chunk_pls),
"first": [ "first": [