diff --git a/scripts/payload_dryrun.py b/scripts/payload_dryrun.py index 9ad6e31..ce3980a 100644 --- a/scripts/payload_dryrun.py +++ b/scripts/payload_dryrun.py @@ -1,13 +1,13 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- """ -scripts/payload_dryrun.py -(see docstring inside for usage) +scripts/payload_dryrun.py (zeigt, was VOR dem Upsert tatsächlich in den Payloads steht) +- KEIN Überschreiben der Note-Payload mehr +- types.yaml ist maßgeblich (gemäß app/core/note_payload.py & chunk_payload.py) """ from __future__ import annotations -import argparse, os, json, yaml, re +import argparse, os, json from typing import Any, Dict, List, Optional -from pathlib import Path from app.core.parser import read_markdown, normalize_frontmatter, validate_required_frontmatter from app.core.note_payload import make_note_payload @@ -18,54 +18,6 @@ try: except Exception: from app.core.edges import build_edges_for_note # type: ignore -def _env(n: str, d: Optional[str]=None) -> str: - v = os.getenv(n) - return v if v is not None else (d or "") - -def load_types() -> dict: - p = _env("MINDNET_TYPES_FILE", "./config/types.yaml") - try: - with open(p, "r", encoding="utf-8") as f: - import yaml - return yaml.safe_load(f) or {} - except Exception: - return {} - -def _deep_get(root: Any, path: str) -> Any: - cur = root - for key in path.split("."): - if not isinstance(cur, dict) or key not in cur: - return None - cur = cur[key] - return cur - -def eff_chunk_profile(note_type: str, fm: Dict[str, Any], reg: dict) -> Optional[str]: - if isinstance(fm.get("chunk_profile"), str): - return fm["chunk_profile"] - types = reg.get("types") if isinstance(reg.get("types"), dict) else reg - if isinstance(types, dict): - tp = types.get(note_type, {}) - if isinstance(tp, dict) and isinstance(tp.get("chunk_profile"), str): - return tp["chunk_profile"] - return None - -def eff_retriever_weight(note_type: str, fm: Dict[str, Any], reg: dict) -> float: - if fm.get("retriever_weight") is not None: - try: return float(fm["retriever_weight"]) - except Exception: pass - types = reg.get("types") if isinstance(reg.get("types"), dict) else reg - for path in [f"{note_type}.retriever_weight", f"{note_type}.retriever.weight", f"{note_type}.retrieval.weight", - "defaults.retriever_weight", "defaults.retriever.weight", "global.retriever_weight", "global.retriever.weight"]: - val = _deep_get(types, path) if "." in path else (types.get(path) if isinstance(types, dict) else None) - if val is None and isinstance(reg, dict): - val = _deep_get(reg, f"types.{path}") - try: - v = float(val) - return v - except Exception: - pass - return 1.0 - def main(): ap = argparse.ArgumentParser() ap.add_argument("--vault", required=True) @@ -73,7 +25,6 @@ def main(): ap.add_argument("--with-edges", action="store_true") args = ap.parse_args() - reg = load_types() root = os.path.abspath(args.vault) files: List[str] = [] @@ -97,17 +48,13 @@ def main(): if args.note_id and fm.get("id") != args.note_id: continue - # Note payload - note_pl = make_note_payload(parsed, vault_root=root, hash_mode="body", hash_normalize="canonical", hash_source="parsed", file_path=path) - - note_type = fm.get("type") or "concept" - cp = eff_chunk_profile(note_type, fm, reg) - rw = eff_retriever_weight(note_type, fm, reg) - - # Das macht der Importer ebenfalls: explizite Spiegelung in Note-Payload - if cp is not None: - note_pl["chunk_profile"] = cp - note_pl["retriever_weight"] = rw + # Note-Payload exakt so, wie der Importer ihn baut (types.yaml maßgeblich) + note_pl = make_note_payload(parsed, + vault_root=root, + hash_mode="body", + hash_normalize="canonical", + hash_source="parsed", + file_path=path) body_text = getattr(parsed, "body", "") or "" chunks = assemble_chunks(fm["id"], body_text, fm.get("type","concept")) @@ -121,12 +68,13 @@ def main(): "note_id": note_pl.get("note_id"), "tags": fm.get("tags"), } + # make_chunk_payloads bestimmt Werte ebenfalls aus types.yaml (Frontmatter wird ignoriert) chunk_pls = make_chunk_payloads( chunk_note, note_pl["path"], chunks, note_text=body_text, - types_cfg=(reg.get("types") if isinstance(reg, dict) and isinstance(reg.get("types"), dict) else reg if isinstance(reg, dict) else {}), + types_cfg=None, # Loader aus Datei; kein Override von außen file_path=path, ) @@ -134,8 +82,10 @@ def main(): "note_id": note_pl.get("note_id") or fm.get("id"), "title": fm.get("title"), "type": fm.get("type"), - "resolved": {"retriever_weight": rw, "chunk_profile": cp}, - "note_payload": {k: note_pl.get(k) for k in ("retriever_weight","chunk_profile")}, + "note_payload": { + "retriever_weight": note_pl.get("retriever_weight"), + "chunk_profile": note_pl.get("chunk_profile") + }, "chunks_summary": { "count": len(chunk_pls), "first": [