diff --git a/app/core/note_payload.py b/app/core/note_payload.py index 0c278d9..054262c 100644 --- a/app/core/note_payload.py +++ b/app/core/note_payload.py @@ -1,159 +1,139 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- """ -app/core/note_payload.py (Mindnet V2 — types.yaml authoritative) -- retriever_weight und chunk_profile werden aus config/types.yaml gelesen. -- Reihenfolge: types. > defaults.* > Fallbacks (1.0 / "default"). -- Frontmatter-Overrides für diese beiden Felder werden bewusst IGNORIERT. -- edge_defaults (falls in types.yaml definiert) werden in die Note-Payload gespiegelt. -- MINDNET_TYPES_FILE kann absoluten Pfad liefern, sonst ./config/types.yaml. +Modul: app/core/note_payload.py +Version: 2.0.0 + +Zweck +----- +Erzeugt ein robustes Note-Payload. Werte wie `retriever_weight`, `chunk_profile` +und `edge_defaults` werden in folgender Priorität bestimmt: +1) Frontmatter (Note) +2) Typ-Registry (config/types.yaml: types..*) +3) Registry-Defaults (config/types.yaml: defaults.*) +4) ENV-Defaults (MINDNET_DEFAULT_RETRIEVER_WEIGHT / MINDNET_DEFAULT_CHUNK_PROFILE) """ - from __future__ import annotations -from typing import Any, Dict, Optional, List -import os, yaml, hashlib, datetime -# ----------------------- helpers ----------------------- +from typing import Any, Dict, Tuple, Optional +import os, json, pathlib -def _env(n: str, d: Optional[str]=None) -> str: - v = os.getenv(n) - return v if v is not None else (d or "") +try: + import yaml +except Exception: + yaml = None -def _load_types() -> dict: - p = _env("MINDNET_TYPES_FILE", "./config/types.yaml") +def _as_dict(x) -> Dict[str, Any]: + if isinstance(x, dict): + return dict(x) try: - with open(p, "r", encoding="utf-8") as f: - return yaml.safe_load(f) or {} + return dict(x or {}) + except Exception: + return {"raw": str(x)} + +def _pick_args(*args, **kwargs) -> Tuple[Optional[str], Optional[dict]]: + path = kwargs.get("path") or (args[0] if args else None) + types_cfg = kwargs.get("types_cfg") or kwargs.get("types") or None + return path, types_cfg + +def _env_float(name: str, default: float) -> float: + try: + return float(os.environ.get(name, default)) + except Exception: + return default + +def _ensure_list(x) -> list: + if x is None: return [] + if isinstance(x, list): return [str(i) for i in x] + if isinstance(x, (set, tuple)): return [str(i) for i in x] + return [str(x)] + +# ---- Registry laden -------------------------------------------------------- + +def _load_types_config(explicit_cfg: Optional[dict] = None) -> dict: + if explicit_cfg and isinstance(explicit_cfg, dict): + return explicit_cfg + path = os.getenv("MINDNET_TYPES_FILE") or "./config/types.yaml" + if not os.path.isfile(path) or yaml is None: + return {} + try: + with open(path, "r", encoding="utf-8") as f: + data = yaml.safe_load(f) or {} + return data if isinstance(data, dict) else {} except Exception: return {} -def _get_types_map(reg: dict) -> dict: - if isinstance(reg, dict) and isinstance(reg.get("types"), dict): - return reg["types"] - return reg if isinstance(reg, dict) else {} +def _cfg_for_type(note_type: str, reg: dict) -> dict: + if not isinstance(reg, dict): + return {} + types = reg.get("types") if isinstance(reg.get("types"), dict) else reg + return types.get(note_type, {}) if isinstance(types, dict) else {} -def _get_defaults(reg: dict) -> dict: - if isinstance(reg, dict) and isinstance(reg.get("defaults"), dict): - return reg["defaults"] - # alias "global" erlaubt - if isinstance(reg, dict) and isinstance(reg.get("global"), dict): - return reg["global"] +def _cfg_defaults(reg: dict) -> dict: + if not isinstance(reg, dict): + return {} + for key in ("defaults", "default", "global"): + v = reg.get(key) + if isinstance(v, dict): + return v return {} -def _resolve_chunk_profile(note_type: str, reg: dict) -> str: - types = _get_types_map(reg) - if isinstance(types, dict): - t = types.get(note_type, {}) - if isinstance(t, dict) and isinstance(t.get("chunk_profile"), str): - return t["chunk_profile"] - defs = _get_defaults(reg) - if isinstance(defs, dict) and isinstance(defs.get("chunk_profile"), str): - return defs["chunk_profile"] - return "default" +# ---- Haupt-API ------------------------------------------------------------- -def _as_float(x: Any) -> Optional[float]: +def make_note_payload(note: Any, *args, **kwargs) -> Dict[str, Any]: + n = _as_dict(note) + path_arg, types_cfg_explicit = _pick_args(*args, **kwargs) + reg = _load_types_config(types_cfg_explicit) + + fm = n.get("frontmatter") or {} + fm_type = fm.get("type") or n.get("type") or "concept" + note_type = str(fm_type) + + cfg_type = _cfg_for_type(note_type, reg) + cfg_def = _cfg_defaults(reg) + + default_rw = _env_float("MINDNET_DEFAULT_RETRIEVER_WEIGHT", 1.0) + retriever_weight = fm.get("retriever_weight") + if retriever_weight is None: + retriever_weight = cfg_type.get("retriever_weight", cfg_def.get("retriever_weight", default_rw)) try: - return float(x) + retriever_weight = float(retriever_weight) except Exception: - return None + retriever_weight = default_rw -def _resolve_retriever_weight(note_type: str, reg: dict) -> float: - types = _get_types_map(reg) - if isinstance(types, dict): - t = types.get(note_type, {}) - if isinstance(t, dict) and (t.get("retriever_weight") is not None): - v = _as_float(t.get("retriever_weight")) - if v is not None: - return float(v) - defs = _get_defaults(reg) - if isinstance(defs, dict) and (defs.get("retriever_weight") is not None): - v = _as_float(defs.get("retriever_weight")) - if v is not None: - return float(v) - return 1.0 + chunk_profile = fm.get("chunk_profile") + if chunk_profile is None: + chunk_profile = cfg_type.get("chunk_profile", cfg_def.get("chunk_profile", os.environ.get("MINDNET_DEFAULT_CHUNK_PROFILE", "medium"))) + if not isinstance(chunk_profile, str): + chunk_profile = "medium" -# ------------------- public API ------------------- + edge_defaults = fm.get("edge_defaults") + if edge_defaults is None: + edge_defaults = cfg_type.get("edge_defaults", cfg_def.get("edge_defaults", [])) + edge_defaults = _ensure_list(edge_defaults) -def make_note_payload(parsed_note: Any, - *, - vault_root: str, - hash_mode: str = "body", - hash_normalize: str = "canonical", - hash_source: str = "parsed", - file_path: Optional[str] = None) -> Dict[str, Any]: - """ - Erwartet ein Parsed-Objekt mit Attributen: - - frontmatter (dict) - - body (str) - - path (optional) - Liefert Note-Payload mit deterministischer note_id und Typ-Werten aus types.yaml. - """ - fm = (getattr(parsed_note, "frontmatter", None) or {}) if parsed_note else {} - title = fm.get("title") or "" - note_type = fm.get("type") or "concept" - note_id = fm.get("id") or _stable_id_from_path_or_title(file_path or "", title) - tags = fm.get("tags") or [] - if isinstance(tags, str): - tags = [tags] + note_id = n.get("note_id") or n.get("id") or fm.get("id") + title = n.get("title") or fm.get("title") or "" + path = n.get("path") or path_arg + if isinstance(path, pathlib.Path): + path = str(path) - # types.yaml authoritative: - reg = _load_types() - chunk_profile = _resolve_chunk_profile(note_type, reg) - retriever_weight = _resolve_retriever_weight(note_type, reg) - # edge_defaults (falls vorhanden) - edge_defaults = None - types = _get_types_map(reg) - if isinstance(types, dict): - t = types.get(note_type, {}) - if isinstance(t, dict) and isinstance(t.get("edge_defaults"), list): - edge_defaults = t["edge_defaults"] - - updated = _ts_to_int(fm.get("updated")) - - payload: Dict[str, Any] = { + payload = { "note_id": note_id, - "type": note_type, "title": title, - "tags": tags, - "updated": updated if updated is not None else 0, - "path": file_path or "", + "type": note_type, + "path": path or "", + "retriever_weight": retriever_weight, "chunk_profile": chunk_profile, - "retriever_weight": float(retriever_weight), + "edge_defaults": edge_defaults, } - if edge_defaults is not None: - payload["edge_defaults"] = edge_defaults + tags = fm.get("tags") or fm.get("keywords") or n.get("tags") + if tags: payload["tags"] = _ensure_list(tags) + for k in ("created","modified","date"): + v = fm.get(k) or n.get(k) + if v: payload[k] = str(v) + + # JSON-Roundtrip zur harten Validierung (ASCII beibehalten) + json.loads(json.dumps(payload, ensure_ascii=False)) return payload - -# ------------------- internal utilities ------------------- - -def _stable_id_from_path_or_title(path: str, title: str) -> str: - base = path or title or "" - if not base: - base = "note" - h = hashlib.sha1(base.encode("utf-8")).hexdigest()[:6] - # title-sourced; in V2 typischerweise durch Frontmatter id ersetzt - return f"auto-{h}" - -def _ts_to_int(val: Any) -> Optional[int]: - # akzeptiert YYYY-MM-DD oder epoch int; None bei Fehler - if val is None: - return None - if isinstance(val, int): - return val - if isinstance(val, float): - return int(val) - if isinstance(val, str): - val = val.strip() - # YYYY-MM-DD - try: - dt = datetime.date.fromisoformat(val) - return int(datetime.datetime(dt.year, dt.month, dt.day).timestamp()) - except Exception: - pass - # epoch string - try: - return int(val) - except Exception: - return None - return None