#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ app/core/note_payload.py (Mindnet V2 — types.yaml authoritative) - retriever_weight und chunk_profile werden aus config/types.yaml gelesen. - Reihenfolge: types. > defaults.* > Fallbacks (1.0 / "default"). - Frontmatter-Overrides für diese beiden Felder werden bewusst IGNORIERT. - edge_defaults (falls in types.yaml definiert) werden in die Note-Payload gespiegelt. - MINDNET_TYPES_FILE kann absoluten Pfad liefern, sonst ./config/types.yaml. """ from __future__ import annotations from typing import Any, Dict, Optional, List import os, yaml, hashlib, datetime # ----------------------- helpers ----------------------- def _env(n: str, d: Optional[str]=None) -> str: v = os.getenv(n) return v if v is not None else (d or "") def _load_types() -> dict: p = _env("MINDNET_TYPES_FILE", "./config/types.yaml") try: with open(p, "r", encoding="utf-8") as f: return yaml.safe_load(f) or {} except Exception: return {} def _get_types_map(reg: dict) -> dict: if isinstance(reg, dict) and isinstance(reg.get("types"), dict): return reg["types"] return reg if isinstance(reg, dict) else {} def _get_defaults(reg: dict) -> dict: if isinstance(reg, dict) and isinstance(reg.get("defaults"), dict): return reg["defaults"] # alias "global" erlaubt if isinstance(reg, dict) and isinstance(reg.get("global"), dict): return reg["global"] return {} def _resolve_chunk_profile(note_type: str, reg: dict) -> str: types = _get_types_map(reg) if isinstance(types, dict): t = types.get(note_type, {}) if isinstance(t, dict) and isinstance(t.get("chunk_profile"), str): return t["chunk_profile"] defs = _get_defaults(reg) if isinstance(defs, dict) and isinstance(defs.get("chunk_profile"), str): return defs["chunk_profile"] return "default" def _as_float(x: Any) -> Optional[float]: try: return float(x) except Exception: return None def _resolve_retriever_weight(note_type: str, reg: dict) -> float: types = _get_types_map(reg) if isinstance(types, dict): t = types.get(note_type, {}) if isinstance(t, dict) and (t.get("retriever_weight") is not None): v = _as_float(t.get("retriever_weight")) if v is not None: return float(v) defs = _get_defaults(reg) if isinstance(defs, dict) and (defs.get("retriever_weight") is not None): v = _as_float(defs.get("retriever_weight")) if v is not None: return float(v) return 1.0 # ------------------- public API ------------------- def make_note_payload(parsed_note: Any, *, vault_root: str, hash_mode: str = "body", hash_normalize: str = "canonical", hash_source: str = "parsed", file_path: Optional[str] = None) -> Dict[str, Any]: """ Erwartet ein Parsed-Objekt mit Attributen: - frontmatter (dict) - body (str) - path (optional) Liefert Note-Payload mit deterministischer note_id und Typ-Werten aus types.yaml. """ fm = (getattr(parsed_note, "frontmatter", None) or {}) if parsed_note else {} title = fm.get("title") or "" note_type = fm.get("type") or "concept" note_id = fm.get("id") or _stable_id_from_path_or_title(file_path or "", title) tags = fm.get("tags") or [] if isinstance(tags, str): tags = [tags] # types.yaml authoritative: reg = _load_types() chunk_profile = _resolve_chunk_profile(note_type, reg) retriever_weight = _resolve_retriever_weight(note_type, reg) # edge_defaults (falls vorhanden) edge_defaults = None types = _get_types_map(reg) if isinstance(types, dict): t = types.get(note_type, {}) if isinstance(t, dict) and isinstance(t.get("edge_defaults"), list): edge_defaults = t["edge_defaults"] updated = _ts_to_int(fm.get("updated")) payload: Dict[str, Any] = { "note_id": note_id, "type": note_type, "title": title, "tags": tags, "updated": updated if updated is not None else 0, "path": file_path or "", "chunk_profile": chunk_profile, "retriever_weight": float(retriever_weight), } if edge_defaults is not None: payload["edge_defaults"] = edge_defaults return payload # ------------------- internal utilities ------------------- def _stable_id_from_path_or_title(path: str, title: str) -> str: base = path or title or "" if not base: base = "note" h = hashlib.sha1(base.encode("utf-8")).hexdigest()[:6] # title-sourced; in V2 typischerweise durch Frontmatter id ersetzt return f"auto-{h}" def _ts_to_int(val: Any) -> Optional[int]: # akzeptiert YYYY-MM-DD oder epoch int; None bei Fehler if val is None: return None if isinstance(val, int): return val if isinstance(val, float): return int(val) if isinstance(val, str): val = val.strip() # YYYY-MM-DD try: dt = datetime.date.fromisoformat(val) return int(datetime.datetime(dt.year, dt.month, dt.day).timestamp()) except Exception: pass # epoch string try: return int(val) except Exception: return None return None