""" FILE: app/core/ingestion/ingestion_note_payload.py DESCRIPTION: Baut das JSON-Objekt für mindnet_notes. FEATURES: Multi-Hash (body/full), Config-Fix für chunking_profile. VERSION: 2.4.0 """ from __future__ import annotations from typing import Any, Dict, Tuple, Optional import os import json import pathlib import hashlib import yaml def _as_dict(x) -> Dict[str, Any]: if isinstance(x, dict): return dict(x) out: Dict[str, Any] = {} for attr in ("frontmatter", "body", "id", "note_id", "title", "path", "tags", "type", "created", "modified", "date"): if hasattr(x, attr): val = getattr(x, attr) if val is not None: out[attr] = val if not out: out["raw"] = str(x) return out def _ensure_list(x) -> list: if x is None: return [] if isinstance(x, list): return [str(i) for i in x] if isinstance(x, (set, tuple)): return [str(i) for i in x] return [str(x)] def _compute_hash(content: str) -> str: if not content: return "" return hashlib.sha256(content.encode("utf-8")).hexdigest() def _get_hash_source_content(n: Dict[str, Any], mode: str) -> str: body = str(n.get("body") or "") if mode == "body": return body if mode == "full": fm = n.get("frontmatter") or {} meta_parts = [] for k in sorted(["title", "type", "status", "tags", "chunking_profile", "chunk_profile", "retriever_weight"]): val = fm.get(k) if val is not None: meta_parts.append(f"{k}:{val}") return f" {'|'.join(meta_parts)}||{body}" return body def make_note_payload(note: Any, *args, **kwargs) -> Dict[str, Any]: """Baut das Note-Payload inklusive Multi-Hash.""" n = _as_dict(note) reg = kwargs.get("types_cfg") or {} hash_source = kwargs.get("hash_source", "parsed") hash_normalize = kwargs.get("hash_normalize", "canonical") fm = n.get("frontmatter") or {} note_type = str(fm.get("type") or n.get("type") or "concept") # Weights & Profiles retriever_weight = fm.get("retriever_weight", 1.0) chunk_profile = fm.get("chunking_profile") or fm.get("chunk_profile") or "sliding_standard" payload: Dict[str, Any] = { "note_id": n.get("note_id") or n.get("id") or fm.get("id"), "title": n.get("title") or fm.get("title") or "", "type": note_type, "path": str(n.get("path") or kwargs.get("path") or ""), "retriever_weight": float(retriever_weight), "chunk_profile": chunk_profile, "hashes": {} } for mode in ["body", "full"]: key = f"{mode}:{hash_source}:{hash_normalize}" payload["hashes"][key] = _compute_hash(_get_hash_source_content(n, mode)) if fm.get("tags") or n.get("tags"): payload["tags"] = _ensure_list(fm.get("tags") or n.get("tags")) if fm.get("aliases"): payload["aliases"] = _ensure_list(fm.get("aliases")) for k in ("created", "modified", "date"): v = fm.get(k) or n.get(k) if v: payload[k] = str(v) if n.get("body"): payload["fulltext"] = str(n["body"]) return payload