#!/usr/bin/env python3 # -*- coding: utf-8 -*- # Modul: app/core/note_payload.py # Version: 1.8.0 # Datum: 2025-11-08 # Änderungen: # - 'retriever_weight' (Float; Default via ENV MINDNET_DEFAULT_RETRIEVER_WEIGHT, sonst 1.0) aus Frontmatter in Note-Payload übernommen. # - 'chunk_profile' (falls vorhanden) übernommen. # - Hash-Logik unverändert, kompatibel zu 1.7.0. from __future__ import annotations import argparse import hashlib import json import os from typing import Any, Dict, Optional, Tuple try: from app.core.parser import read_markdown, extract_wikilinks, FRONTMATTER_RE except Exception: # pragma: no cover from .parser import read_markdown, extract_wikilinks, FRONTMATTER_RE # type: ignore # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- def _canon_frontmatter(fm: Dict[str, Any]) -> str: return json.dumps(fm or {}, ensure_ascii=False, separators=(",", ":"), sort_keys=True) def _normalize_body(body: str, mode: str) -> str: if mode == "none": return body if body is not None else "" text = (body or "").replace("\r\n", "\n").replace("\r", "\n") text = "\n".join(line.rstrip() for line in text.split("\n")) return text def _resolve_hash_mode(explicit: Optional[str]) -> str: if explicit: val = explicit.strip().lower() else: val = (os.environ.get("MINDNET_HASH_MODE") or os.environ.get("MINDNET_HASH_COMPARE") or "body").strip().lower() if val in ("full", "fulltext", "body+frontmatter", "bodyplusfrontmatter"): return "full" if val in ("frontmatter", "fm"): return "frontmatter" return "body" def _read_raw_body_from_file(file_path: Optional[str]) -> Tuple[str, Dict[str, Any]]: if not file_path or not os.path.exists(file_path): return "", {} try: with open(file_path, "r", encoding="utf-8") as f: raw = f.read() except Exception: return "", {} m = FRONTMATTER_RE.match(raw) fm = {} if m: fm_txt = m.group(1) try: import yaml # lazy fm = yaml.safe_load(fm_txt) or {} except Exception: fm = {} body = raw[m.end():] else: body = raw return body, fm def _sha256(s: str) -> str: h = hashlib.sha256() h.update(s.encode("utf-8")) return h.hexdigest() def _hash_for(mode: str, *, body: str, fm: Dict[str, Any], normalize: str) -> str: body_n = _normalize_body(body or "", normalize) fm_s = _canon_frontmatter(fm or {}) if mode == "frontmatter": return _sha256(fm_s) if mode == "full": return _sha256(body_n + "\n--FM--\n" + fm_s) # default: body return _sha256(body_n) def _to_float(val: Any, default: float) -> float: try: if val is None: return float(default) if isinstance(val, (int, float)): return float(val) s = str(val).strip().replace(",", ".") return float(s) except Exception: return float(default) # --------------------------------------------------------------------------- # Kernfunktion # --------------------------------------------------------------------------- def make_note_payload( parsed: Any, vault_root: Optional[str] = None, *, hash_mode: Optional[str] = None, hash_normalize: Optional[str] = None, hash_source: Optional[str] = None, file_path: Optional[str] = None, ) -> Dict[str, Any]: """ Liefert den Note-Payload inkl. Mehrfach-Hashes und FM-Feldern. """ # dict oder Objekt akzeptieren if isinstance(parsed, dict): fm = parsed.get("frontmatter") or {} body_parsed = parsed.get("body") or "" path = parsed.get("path") or "" else: fm = getattr(parsed, "frontmatter", {}) or {} body_parsed = getattr(parsed, "body", "") or "" path = getattr(parsed, "path", "") or "" # Zielpfad relativieren rel_path = path try: if vault_root: rel = os.path.relpath(path, vault_root) rel = rel.replace("\\", "/").lstrip("/") rel_path = rel except Exception: pass # Konfiguration auflösen mode_resolved = _resolve_hash_mode(hash_mode) # body|frontmatter|full src = (hash_source or os.environ.get("MINDNET_HASH_SOURCE", "parsed")).strip().lower() # parsed|raw norm = (hash_normalize or os.environ.get("MINDNET_HASH_NORMALIZE", "canonical")).strip().lower() # canonical|none # Body-Quelle laden raw_body, raw_fm = ("", {}) if src == "raw": raw_body, raw_fm = _read_raw_body_from_file(file_path or path) if isinstance(raw_fm, dict) and raw_fm: merged_fm = dict(fm) for k, v in raw_fm.items(): merged_fm.setdefault(k, v) fm = merged_fm body_for_hash = raw_body else: body_for_hash = body_parsed # --- 1) Standard-Tripel (parsed:canonical) immer erzeugen --- std_src = "parsed" std_norm = "canonical" std_hashes: Dict[str, str] = {} for m in ("body", "frontmatter", "full"): std_hashes[f"{m}:{std_src}:{std_norm}"] = _hash_for( m, body=body_parsed, fm=fm, normalize=std_norm ) # Convenience-Felder (für Tools) hash_body = std_hashes["body:parsed:canonical"] hash_frontmatter = std_hashes["frontmatter:parsed:canonical"] hash_full = std_hashes["full:parsed:canonical"] # --- 2) Hashes für die *aktuelle* Konfiguration (falls abweichend) --- cur_hashes: Dict[str, str] = {} if not (src == std_src and norm == std_norm): for m in ("body", "frontmatter", "full"): cur_hashes[f"{m}:{src}:{norm}"] = _hash_for( m, body=body_for_hash, fm=fm, normalize=norm ) # --- 3) Aktueller Modus für Backwards-Compat Felder --- current_hash = _hash_for(mode_resolved, body=body_for_hash, fm=fm, normalize=norm) hash_signature = f"{mode_resolved}:{src}:{norm}:{current_hash}" # Wikilinks (Note-Ebene) refs = list(dict.fromkeys(extract_wikilinks(body_parsed))) if body_parsed else [] # NEU: Defaults & Casting default_rw = _to_float(os.environ.get("MINDNET_DEFAULT_RETRIEVER_WEIGHT", 1.0), 1.0) fm_rw = _to_float(fm.get("retriever_weight"), default_rw) fm_chunk_profile = fm.get("chunk_profile") or fm.get("profile") or None payload: Dict[str, Any] = { "note_id": fm.get("id") or fm.get("note_id"), "title": fm.get("title"), "type": fm.get("type"), "status": fm.get("status"), "created": fm.get("created"), "updated": fm.get("updated"), "path": rel_path or fm.get("path"), "tags": fm.get("tags"), # Volltext für verlustfreien Export "fulltext": body_parsed, # Backwards-Compat: "hash_fulltext": current_hash, "hash_signature": hash_signature, # Option C: Mehrfach-Hashes "hashes": {**std_hashes, **cur_hashes}, "hash_body": hash_body, "hash_frontmatter": hash_frontmatter, "hash_full": hash_full, # Fallback-Refs "references": refs, # NEU: "retriever_weight": fm_rw, } if fm_chunk_profile is not None: payload["chunk_profile"] = str(fm_chunk_profile) for k in ("area", "project", "source", "lang", "slug", "aliases"): if k in fm: payload[k] = fm[k] return payload # --------------------------------------------------------------------------- # CLI – Sichtprüfung # --------------------------------------------------------------------------- def _cli() -> None: ap = argparse.ArgumentParser(description="Note-Payload aus Markdown erzeugen und anzeigen") ap.add_argument("--from-file", dest="src", required=True) ap.add_argument("--vault-root", dest="vault_root", default=None) ap.add_argument("--print", dest="do_print", action="store_true") ap.add_argument("--hash-mode", choices=["body", "frontmatter", "full"], default=None) ap.add_argument("--hash-normalize", choices=["canonical", "none"], default=None) ap.add_argument("--hash-source", choices=["parsed", "raw"], default=None) args = ap.parse_args() parsed = read_markdown(args.src) payload = make_note_payload( parsed, vault_root=args.vault_root, hash_mode=args.hash_mode, hash_normalize=args.hash_normalize, hash_source=args.hash_source, file_path=args.src, ) if args.do_print: print(json.dumps(payload, ensure_ascii=False, indent=2)) if __name__ == "__main__": # pragma: no cover _cli()