scripts/payload_dryrun.py aktualisiert
All checks were successful
Deploy mindnet to llm-node / deploy (push) Successful in 3s
All checks were successful
Deploy mindnet to llm-node / deploy (push) Successful in 3s
This commit is contained in:
parent
22d08afe2d
commit
3d74eff224
|
|
@ -1,13 +1,13 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
"""
|
"""
|
||||||
scripts/payload_dryrun.py
|
scripts/payload_dryrun.py (zeigt, was VOR dem Upsert tatsächlich in den Payloads steht)
|
||||||
(see docstring inside for usage)
|
- KEIN Überschreiben der Note-Payload mehr
|
||||||
|
- types.yaml ist maßgeblich (gemäß app/core/note_payload.py & chunk_payload.py)
|
||||||
"""
|
"""
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
import argparse, os, json, yaml, re
|
import argparse, os, json
|
||||||
from typing import Any, Dict, List, Optional
|
from typing import Any, Dict, List, Optional
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
from app.core.parser import read_markdown, normalize_frontmatter, validate_required_frontmatter
|
from app.core.parser import read_markdown, normalize_frontmatter, validate_required_frontmatter
|
||||||
from app.core.note_payload import make_note_payload
|
from app.core.note_payload import make_note_payload
|
||||||
|
|
@ -18,54 +18,6 @@ try:
|
||||||
except Exception:
|
except Exception:
|
||||||
from app.core.edges import build_edges_for_note # type: ignore
|
from app.core.edges import build_edges_for_note # type: ignore
|
||||||
|
|
||||||
def _env(n: str, d: Optional[str]=None) -> str:
|
|
||||||
v = os.getenv(n)
|
|
||||||
return v if v is not None else (d or "")
|
|
||||||
|
|
||||||
def load_types() -> dict:
|
|
||||||
p = _env("MINDNET_TYPES_FILE", "./config/types.yaml")
|
|
||||||
try:
|
|
||||||
with open(p, "r", encoding="utf-8") as f:
|
|
||||||
import yaml
|
|
||||||
return yaml.safe_load(f) or {}
|
|
||||||
except Exception:
|
|
||||||
return {}
|
|
||||||
|
|
||||||
def _deep_get(root: Any, path: str) -> Any:
|
|
||||||
cur = root
|
|
||||||
for key in path.split("."):
|
|
||||||
if not isinstance(cur, dict) or key not in cur:
|
|
||||||
return None
|
|
||||||
cur = cur[key]
|
|
||||||
return cur
|
|
||||||
|
|
||||||
def eff_chunk_profile(note_type: str, fm: Dict[str, Any], reg: dict) -> Optional[str]:
|
|
||||||
if isinstance(fm.get("chunk_profile"), str):
|
|
||||||
return fm["chunk_profile"]
|
|
||||||
types = reg.get("types") if isinstance(reg.get("types"), dict) else reg
|
|
||||||
if isinstance(types, dict):
|
|
||||||
tp = types.get(note_type, {})
|
|
||||||
if isinstance(tp, dict) and isinstance(tp.get("chunk_profile"), str):
|
|
||||||
return tp["chunk_profile"]
|
|
||||||
return None
|
|
||||||
|
|
||||||
def eff_retriever_weight(note_type: str, fm: Dict[str, Any], reg: dict) -> float:
|
|
||||||
if fm.get("retriever_weight") is not None:
|
|
||||||
try: return float(fm["retriever_weight"])
|
|
||||||
except Exception: pass
|
|
||||||
types = reg.get("types") if isinstance(reg.get("types"), dict) else reg
|
|
||||||
for path in [f"{note_type}.retriever_weight", f"{note_type}.retriever.weight", f"{note_type}.retrieval.weight",
|
|
||||||
"defaults.retriever_weight", "defaults.retriever.weight", "global.retriever_weight", "global.retriever.weight"]:
|
|
||||||
val = _deep_get(types, path) if "." in path else (types.get(path) if isinstance(types, dict) else None)
|
|
||||||
if val is None and isinstance(reg, dict):
|
|
||||||
val = _deep_get(reg, f"types.{path}")
|
|
||||||
try:
|
|
||||||
v = float(val)
|
|
||||||
return v
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
return 1.0
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
ap = argparse.ArgumentParser()
|
ap = argparse.ArgumentParser()
|
||||||
ap.add_argument("--vault", required=True)
|
ap.add_argument("--vault", required=True)
|
||||||
|
|
@ -73,7 +25,6 @@ def main():
|
||||||
ap.add_argument("--with-edges", action="store_true")
|
ap.add_argument("--with-edges", action="store_true")
|
||||||
args = ap.parse_args()
|
args = ap.parse_args()
|
||||||
|
|
||||||
reg = load_types()
|
|
||||||
root = os.path.abspath(args.vault)
|
root = os.path.abspath(args.vault)
|
||||||
|
|
||||||
files: List[str] = []
|
files: List[str] = []
|
||||||
|
|
@ -97,17 +48,13 @@ def main():
|
||||||
if args.note_id and fm.get("id") != args.note_id:
|
if args.note_id and fm.get("id") != args.note_id:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Note payload
|
# Note-Payload exakt so, wie der Importer ihn baut (types.yaml maßgeblich)
|
||||||
note_pl = make_note_payload(parsed, vault_root=root, hash_mode="body", hash_normalize="canonical", hash_source="parsed", file_path=path)
|
note_pl = make_note_payload(parsed,
|
||||||
|
vault_root=root,
|
||||||
note_type = fm.get("type") or "concept"
|
hash_mode="body",
|
||||||
cp = eff_chunk_profile(note_type, fm, reg)
|
hash_normalize="canonical",
|
||||||
rw = eff_retriever_weight(note_type, fm, reg)
|
hash_source="parsed",
|
||||||
|
file_path=path)
|
||||||
# Das macht der Importer ebenfalls: explizite Spiegelung in Note-Payload
|
|
||||||
if cp is not None:
|
|
||||||
note_pl["chunk_profile"] = cp
|
|
||||||
note_pl["retriever_weight"] = rw
|
|
||||||
|
|
||||||
body_text = getattr(parsed, "body", "") or ""
|
body_text = getattr(parsed, "body", "") or ""
|
||||||
chunks = assemble_chunks(fm["id"], body_text, fm.get("type","concept"))
|
chunks = assemble_chunks(fm["id"], body_text, fm.get("type","concept"))
|
||||||
|
|
@ -121,12 +68,13 @@ def main():
|
||||||
"note_id": note_pl.get("note_id"),
|
"note_id": note_pl.get("note_id"),
|
||||||
"tags": fm.get("tags"),
|
"tags": fm.get("tags"),
|
||||||
}
|
}
|
||||||
|
# make_chunk_payloads bestimmt Werte ebenfalls aus types.yaml (Frontmatter wird ignoriert)
|
||||||
chunk_pls = make_chunk_payloads(
|
chunk_pls = make_chunk_payloads(
|
||||||
chunk_note,
|
chunk_note,
|
||||||
note_pl["path"],
|
note_pl["path"],
|
||||||
chunks,
|
chunks,
|
||||||
note_text=body_text,
|
note_text=body_text,
|
||||||
types_cfg=(reg.get("types") if isinstance(reg, dict) and isinstance(reg.get("types"), dict) else reg if isinstance(reg, dict) else {}),
|
types_cfg=None, # Loader aus Datei; kein Override von außen
|
||||||
file_path=path,
|
file_path=path,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -134,8 +82,10 @@ def main():
|
||||||
"note_id": note_pl.get("note_id") or fm.get("id"),
|
"note_id": note_pl.get("note_id") or fm.get("id"),
|
||||||
"title": fm.get("title"),
|
"title": fm.get("title"),
|
||||||
"type": fm.get("type"),
|
"type": fm.get("type"),
|
||||||
"resolved": {"retriever_weight": rw, "chunk_profile": cp},
|
"note_payload": {
|
||||||
"note_payload": {k: note_pl.get(k) for k in ("retriever_weight","chunk_profile")},
|
"retriever_weight": note_pl.get("retriever_weight"),
|
||||||
|
"chunk_profile": note_pl.get("chunk_profile")
|
||||||
|
},
|
||||||
"chunks_summary": {
|
"chunks_summary": {
|
||||||
"count": len(chunk_pls),
|
"count": len(chunk_pls),
|
||||||
"first": [
|
"first": [
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user