Dateien nach "app/core" hochladen

2025-11-09 09:51:05 +01:00 · 2025-11-09 09:51:05 +01:00 · 4dcd606c10
commit 4dcd606c10
parent bbd5a7fa48
2 changed files with 372 additions and 341 deletions
--- a/app/core/chunk_payload.py
+++ b/app/core/chunk_payload.py
@ -1,55 +1,105 @@
-
 """
-chunk_payload.py — Mindnet payload builder (Chunks)
-Version: 1.3.0 (2025-11-09)
+chunk_payload.py — v1.4.2
+-------------------------
+Robuste, abwärtskompatible Payload-Erzeugung für Chunks.

-Purpose
-------
-Build Qdrant-compatible JSON payloads for *chunks* of a parsed note.
-Tolerant to different call signatures and accepts both dict-like and object-like inputs.
+Ziele
+- Setzt pro Chunk `text`, `retriever_weight`, `chunk_profile`, `note_id`.
+- Akzeptiert ParsedNote-Objekte *oder* Dicts, inklusive bereits vorsegmentierter .chunks.
+- Verträgt zusätzliche args/kwargs (kompatibel zu älteren Aufrufern).
+- Konfig-Auflösung identisch zu note_payload.py.

-Key features
------------
- Reads type defaults from `config/config.yaml` or `config/types.yaml` (same schema).
- Resolves fields with precedence:
-    Frontmatter > type-defaults > ENV > fallback.
- Sets per chunk:
-    * `note_id`, `note_title`, `type`
-    * `retriever_weight` (float)
-    * `chunk_profile`    (short|medium|long)
-    * `text`             (never empty: falls back to whole note body/text)
-    * `order`, `section`, `start`, `end` (if available)
- Backwards-compatible signature: accepts **kwargs to swallow unknown args.
-
-Input
-----
-`parsed_note` may be:
-  - dict with keys: id, title, body/text, chunks(list), frontmatter(dict), type
-  - object with equivalent attributes
-
-Each chunk may be dict-like or object-like with keys/attrs such as:
-  id, text, order, section, start, end
+Autor: ChatGPT
+Lizenz: MIT
 """
-
 from __future__ import annotations

 import os
+import hashlib
 from pathlib import Path
 from typing import Any, Dict, List, Optional, Union

 try:
    import yaml  # type: ignore
 except Exception:  # pragma: no cover
-    yaml = None
+    yaml = None  # type: ignore

-def _get(obj: Any, key: str, default: Any = None) -> Any:
-    if isinstance(obj, dict):
-        return obj.get(key, default)
-    return getattr(obj, key, default)

-def _frontmatter(obj: Any) -> Dict[str, Any]:
-    fm = _get(obj, "frontmatter", {}) or {}
-    return fm if isinstance(fm, dict) else {}
+def _as_dict(note: Any) -> Dict[str, Any]:
+    if isinstance(note, dict):
+        return dict(note)
+    out: Dict[str, Any] = {}
+    for attr in ("note_id", "id", "title", "type", "frontmatter", "meta", "body", "text", "content", "path", "chunks"):
+        if hasattr(note, attr):
+            out[attr] = getattr(note, attr)
+    if hasattr(note, "__dict__"):
+        for k, v in note.__dict__.items():
+            if k not in out:
+                out[k] = v
+    return out
+
+
+def _load_types_config(search_root: Optional[Union[str, Path]] = None,
+                       preloaded: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
+    if isinstance(preloaded, dict) and "types" in preloaded:
+        return preloaded
+
+    candidates: List[Path] = []
+    if search_root:
+        root = Path(search_root)
+        candidates.extend([root / "config.yaml", root / "config" / "config.yaml", root / "config" / "types.yaml"])
+    cwd = Path.cwd()
+    candidates.extend([cwd / "config.yaml", cwd / "config" / "config.yaml", cwd / "config" / "types.yaml"])
+
+    for p in candidates:
+        if p.exists() and p.is_file():
+            if yaml is None:
+                break
+            try:
+                data = yaml.safe_load(p.read_text(encoding="utf-8")) or {}
+                if isinstance(data, dict) and "types" in data:
+                    return data
+            except Exception:
+                pass
+    return {"version": "1.0", "types": {}}
+
+
+def _safe_get(d: Dict[str, Any], key: str, default: Any = None) -> Any:
+    if not isinstance(d, dict):
+        return default
+    return d.get(key, default)
+
+
+def _resolve_type(note_d: Dict[str, Any]) -> str:
+    fm = note_d.get("frontmatter") or {}
+    t = _safe_get(fm, "type") or note_d.get("type")
+    if not t and isinstance(note_d.get("meta"), dict):
+        t = note_d["meta"].get("type")
+    return str(t or "concept")
+
+
+def _resolve_note_id(note_d: Dict[str, Any]) -> Optional[str]:
+    for k in ("note_id", "id"):
+        v = note_d.get(k)
+        if isinstance(v, str) and v:
+            return v
+    return None
+
+
+def _resolve_body(note_d: Dict[str, Any]) -> str:
+    for k in ("body", "text", "content"):
+        v = note_d.get(k)
+        if isinstance(v, str) and v.strip():
+            return v
+    return ""
+
+
+def _resolve_defaults_for_type(types_cfg: Dict[str, Any], typ: str) -> Dict[str, Any]:
+    if not isinstance(types_cfg, dict):
+        return {}
+    t = (types_cfg.get("types") or {}).get(typ) or {}
+    return t if isinstance(t, dict) else {}
+

 def _coerce_float(val: Any, default: float) -> float:
    try:
@ -57,124 +107,109 @@ def _coerce_float(val: Any, default: float) -> float:
            return default
        if isinstance(val, (int, float)):
            return float(val)
-        if isinstance(val, str) and val.strip():
+        if isinstance(val, str):
            return float(val.strip())
    except Exception:
        pass
    return default

-def _normalize_chunk_profile(val: Any, fallback: str = "medium") -> str:
-    if not isinstance(val, str):
-        return fallback
-    v = val.strip().lower()
-    if v in {"short", "medium", "long"}:
-        return v
-    return fallback

-def _safe_text(s: Any) -> str:
-    if s is None:
-        return ""
+def _compute_retriever_weight(note_d: Dict[str, Any], types_cfg: Dict[str, Any], typ: str) -> float:
+    fm = note_d.get("frontmatter") or {}
+    if "retriever_weight" in fm:
+        return _coerce_float(fm.get("retriever_weight"), 1.0)
+    tdef = _resolve_defaults_for_type(types_cfg, typ)
+    if "retriever_weight" in tdef:
+        return _coerce_float(tdef.get("retriever_weight"), 1.0)
+    envv = os.getenv("MINDNET_DEFAULT_RETRIEVER_WEIGHT")
+    if envv:
+        return _coerce_float(envv, 1.0)
+    return 1.0
+
+
+def _compute_chunk_profile(note_d: Dict[str, Any], types_cfg: Dict[str, Any], typ: str) -> str:
+    fm = note_d.get("frontmatter") or {}
+    if "chunk_profile" in fm:
+        return str(fm.get("chunk_profile"))
+    tdef = _resolve_defaults_for_type(types_cfg, typ)
+    if "chunk_profile" in tdef:
+        return str(tdef.get("chunk_profile"))
+    envv = os.getenv("MINDNET_DEFAULT_CHUNK_PROFILE")
+    if envv:
+        return str(envv)
+    return "medium"
+
+
+def _norm_chunk_text(s: Any) -> str:
    if isinstance(s, str):
-        return s
-    return str(s)
+        return s.strip()
+    return ""

-def _load_types_config(
-    explicit_config: Optional[Dict[str, Any]] = None,
-    search_root: Union[str, Path, None] = None,
-) -> Dict[str, Any]:
-    if explicit_config and isinstance(explicit_config, dict):
-        if "types" in explicit_config and isinstance(explicit_config["types"], dict):
-            return explicit_config
-    if yaml is None:
-        return {"types": {}}
-    candidates: List[Path] = []
-    root = Path(search_root) if search_root else Path.cwd()
-    candidates.append(root / "config" / "config.yaml")
-    candidates.append(root / "config" / "types.yaml")
-    candidates.append(Path.cwd() / "config" / "config.yaml")
-    candidates.append(Path.cwd() / "config" / "types.yaml")
-    for p in candidates:
-        try:
-            if p.exists():
-                import yaml as _y
-                with p.open("r", encoding="utf-8") as f:
-                    loaded = _y.safe_load(f) or {}
-                    if isinstance(loaded, dict) and isinstance(loaded.get("types"), dict):
-                        return {"types": loaded["types"]}
-        except Exception:
-            continue
-    return {"types": {}}

-def _type_defaults(note_type: str, cfg: Dict[str, Any]) -> Dict[str, Any]:
-    return (cfg.get("types") or {}).get(note_type, {}) if isinstance(cfg, dict) else {}
+def _hash(s: str) -> str:
+    return hashlib.sha1(s.encode("utf-8")).hexdigest()[:12]

-def make_chunk_payloads(
-    parsed_note: Any,
-    config: Optional[Dict[str, Any]] = None,
-    **kwargs: Any,
-) -> List[Dict[str, Any]]:
-    search_root = kwargs.get("search_root")
-    fm = _frontmatter(parsed_note)
-    note_type = fm.get("type") or _get(parsed_note, "type") or "concept"
-    note_type = str(note_type).strip().lower()

-    cfg = _load_types_config(config, search_root)
-    defaults = _type_defaults(note_type, cfg)
+def make_chunk_payloads(note: Any, *args, **kwargs) -> List[Dict[str, Any]]:
+    """Erzeugt Payloads für alle Chunks der Note.

-    # Resolve retriever_weight: FM > type-defaults > ENV > 1.0
-    rw = fm.get("retriever_weight")
-    if rw is None:
-        rw = defaults.get("retriever_weight")
-    if rw is None:
-        env_rw = os.getenv("MINDNET_DEFAULT_RETRIEVER_WEIGHT")
-        rw = _coerce_float(env_rw, 1.0)
-    else:
-        rw = _coerce_float(rw, 1.0)
+    Akzeptierte zusätzliche kwargs:
+      - types_config: dict wie in config.yaml
+      - search_root / vault_root: für Konfigsuche

-    # Resolve chunk_profile: FM > type-defaults > ENV > medium
-    cp = fm.get("chunk_profile")
-    if cp is None:
-        cp = defaults.get("chunk_profile")
-    if cp is None:
-        cp = os.getenv("MINDNET_DEFAULT_CHUNK_PROFILE", "medium")
-    cp = _normalize_chunk_profile(cp, "medium")
+    *args werden ignoriert (Kompatibilität zu älteren Aufrufern).
+    """
+    note_d = _as_dict(note)

-    note_id = _get(parsed_note, "id")
-    note_title = _get(parsed_note, "title")
-    body = _get(parsed_note, "body") or _get(parsed_note, "text") or ""
+    types_config = kwargs.get("types_config")
+    search_root = kwargs.get("search_root") or kwargs.get("vault_root")
+    types_cfg = _load_types_config(search_root, types_config)

-    items = _get(parsed_note, "chunks") or []
-    payloads: List[Dict[str, Any]] = []
+    typ = _resolve_type(note_d)
+    note_id = _resolve_note_id(note_d) or ""

-    if not items:
-        items = [{
-            "id": f"{note_id}::0" if note_id else None,
-            "text": body,
-            "order": 0,
-            "section": None,
-            "start": 0,
-            "end": len(body) if isinstance(body, str) else None,
-        }]
+    r_weight = _compute_retriever_weight(note_d, types_cfg, typ)
+    c_profile = _compute_chunk_profile(note_d, types_cfg, typ)

-    for ch in items:
-        text = _safe_text(_get(ch, "text"))
-        if not text:
-            text = _safe_text(body)
+    out: List[Dict[str, Any]] = []

-        payload = {
-            "note_id": note_id,
-            "note_title": note_title,
-            "type": note_type,
-            "retriever_weight": float(rw),
-            "chunk_profile": cp,
-            "text": text,
-            "order": _get(ch, "order"),
-            "section": _get(ch, "section"),
-            "start": _get(ch, "start"),
-            "end": _get(ch, "end"),
-            "chunk_id": _get(ch, "id"),
-        }
-        payload = {k: v for k, v in payload.items() if v is not None}
-        payloads.append(payload)
+    # 1) Falls der Parser bereits Chunks liefert, nutzen
+    pre = note_d.get("chunks")
+    if isinstance(pre, list) and pre:
+        for idx, c in enumerate(pre):
+            if isinstance(c, dict):
+                text = _norm_chunk_text(c.get("text") or c.get("body") or c.get("content"))
+            else:
+                text = _norm_chunk_text(getattr(c, "text", ""))
+            if not text:
+                # Fallback auf Note-Body, falls leer
+                text = _resolve_body(note_d)
+            if not text:
+                continue

-    return payloads
+            chunk_id = f"{note_id}#{idx:03d}" if note_id else _hash(text)[:8]
+            payload = {
+                "note_id": note_id,
+                "chunk_id": chunk_id,
+                "text": text,
+                "retriever_weight": float(r_weight),
+                "chunk_profile": str(c_profile),
+                "type": typ,
+            }
+            out.append(payload)
+
+    # 2) Sonst als Single-Chunk aus Body/Text
+    if not out:
+        text = _resolve_body(note_d)
+        if text:
+            chunk_id = f"{note_id}#000" if note_id else _hash(text)[:8]
+            out.append({
+                "note_id": note_id,
+                "chunk_id": chunk_id,
+                "text": text,
+                "retriever_weight": float(r_weight),
+                "chunk_profile": str(c_profile),
+                "type": typ,
+            })
+
+    return out
--- a/app/core/note_payload.py
+++ b/app/core/note_payload.py
@ -1,81 +1,100 @@
-
 """
-note_payload.py — Mindnet payload builder (Notes)
-Version: 1.3.0 (2025-11-09)
+note_payload.py — v1.4.2
+------------------------
+Robuste, abwärtskompatible Payload-Erzeugung für Notes.

-Purpose
-------
-Build Qdrant-compatible JSON payloads for *notes* from a parsed Markdown
-representation. The function is tolerant to different call signatures and
-accepts both dict-like and object-like "ParsedNote" inputs.
+Ziele
+- Setzt `retriever_weight`, `chunk_profile`, `edge_defaults` deterministisch.
+- Priorität: Frontmatter > Typ-Defaults (config/config.yaml oder config/types.yaml) > ENV > Fallback.
+- Akzeptiert ParsedNote-Objekte *oder* Dicts.
+- Verträgt zusätzliche kwargs (z. B. vault_root/search_root/cfg).
+- Keine Verwendung nicht-serialisierbarer Typen.

-Key features
------------
- Reads type defaults from `config/config.yaml` or `config/types.yaml` (same schema).
- Resolves fields with the following precedence:
-    Frontmatter > type-defaults > ENV > hard-coded fallback.
- Ensures only JSON-serializable types are included (no sets, Path, callables).
- Sets/normalizes:
-    * `type`              : note type (e.g., concept, task, experience, project)
-    * `retriever_weight`  : float, influences retrieval blending downstream
-    * `chunk_profile`     : short | medium | long (string)
-    * `edge_defaults`     : list[str], used by edge builder outside of this module
- Backwards-compatible signature: accepts **kwargs to swallow unknown args
-  (e.g., vault_root, prefix, ...).
+Hinweis
+- Diese Datei **lädt Konfig** nur opportunistisch (./config/config.yaml oder ./config/types.yaml relativ zum CWD
+  bzw. zu `search_root`/`vault_root`, falls übergeben). Wenn dein Aufrufer bereits eine Konfiguration geladen hat,
+  kann er sie via `types_config` kwarg übergeben (dict wie in deinem Beispiel).

-Expected input (flexible)
-------------------------
-`parsed_note` may be:
-  - dict with keys: id, title, body/text, path, frontmatter (dict), type, ...
-  - object with attributes: id, title, body/text, path, frontmatter, type, ...
-
-Schema for config files
-----------------------
-version: 1.0
-types:
-  concept:
-    chunk_profile: medium
-    edge_defaults: ["references", "related_to"]
-    retriever_weight: 0.33
-  task:
-    chunk_profile: short
-    edge_defaults: ["depends_on", "belongs_to"]
-    retriever_weight: 0.8
-  experience:
-    chunk_profile: medium
-    edge_defaults: ["derived_from", "inspired_by"]
-    retriever_weight: 0.9
-  project:
-    chunk_profile: long
-    edge_defaults: ["references", "depends_on"]
-    retriever_weight: 0.95
+Autor: ChatGPT
+Lizenz: MIT
 """
-
 from __future__ import annotations

-import json
 import os
 from pathlib import Path
-from typing import Any, Dict, List, Optional, Union
+from typing import Any, Dict, Optional, Union, List

 try:
    import yaml  # type: ignore
-except Exception:  # pragma: no cover
-    yaml = None  # The caller must ensure PyYAML is installed
+except Exception:  # pragma: no cover - yaml ist optional, wir degradieren dann sauber
+    yaml = None  # type: ignore
+

 # ------------------------------
-# Helpers
+# Hilfsfunktionen (keine I/O Magie)
 # ------------------------------

-def _get(obj: Any, key: str, default: Any = None) -> Any:
-    """Get key from dict-like or attribute from object-like."""
-    if isinstance(obj, dict):
-        return obj.get(key, default)
-    return getattr(obj, key, default)
+def _as_dict(note: Any) -> Dict[str, Any]:
+    """Konvertiert eine ParsedNote-ähnliche Struktur robust in ein Dict."""
+    if isinstance(note, dict):
+        return dict(note)
+    # Objekt -> vorsichtig Attribute lesen
+    out: Dict[str, Any] = {}
+    for attr in ("note_id", "id", "title", "type", "frontmatter", "meta", "body", "text", "content", "path"):
+        if hasattr(note, attr):
+            out[attr] = getattr(note, attr)
+    # Manche Parser haben .data / .raw etc.
+    if hasattr(note, "__dict__"):
+        # nichts überschreiben, nur fehlende ergänzen (nur einfache Typen)
+        for k, v in note.__dict__.items():
+            if k not in out:
+                out[k] = v
+    return out
+
+
+def _safe_get(d: Dict[str, Any], key: str, default: Any = None) -> Any:
+    """Dict-get ohne Mutation, akzeptiert fehlende Dicts."""
+    if not isinstance(d, dict):
+        return default
+    return d.get(key, default)
+
+
+def _load_types_config(search_root: Optional[Union[str, Path]] = None,
+                       preloaded: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
+    """Lädt Typ-Defaults aus config.yaml oder types.yaml (falls vorhanden).
+    Struktur erwartet wie im Beispiel:
+    {
+      "version": "1.0",
+      "types": {
+        "concept": {"chunk_profile": "medium", "edge_defaults": [...], "retriever_weight": 0.33},
+        ...
+      }
+    }
+    """
+    if isinstance(preloaded, dict) and "types" in preloaded:
+        return preloaded
+
+    candidates: List[Path] = []
+    if search_root:
+        root = Path(search_root)
+        candidates.extend([root / "config.yaml", root / "config" / "config.yaml", root / "config" / "types.yaml"])
+    # relative zum CWD
+    cwd = Path.cwd()
+    candidates.extend([cwd / "config.yaml", cwd / "config" / "config.yaml", cwd / "config" / "types.yaml"])
+
+    for p in candidates:
+        if p.exists() and p.is_file():
+            if yaml is None:
+                break
+            try:
+                data = yaml.safe_load(p.read_text(encoding="utf-8")) or {}
+                if isinstance(data, dict) and "types" in data:
+                    return data
+            except Exception:
+                # still und hart, kein Crash bei kaputter Datei
+                pass
+    return {"version": "1.0", "types": {}}

-def _frontmatter(obj: Any) -> Dict[str, Any]:
-    fm = _get(obj, "frontmatter", {}) or {}
-    return fm if isinstance(fm, dict) else {}

 def _coerce_float(val: Any, default: float) -> float:
    try:
@ -83,170 +102,147 @@ def _coerce_float(val: Any, default: float) -> float:
            return default
        if isinstance(val, (int, float)):
            return float(val)
-        if isinstance(val, str) and val.strip():
+        if isinstance(val, str):
            return float(val.strip())
    except Exception:
        pass
    return default

-def _normalize_chunk_profile(val: Any, fallback: str = "medium") -> str:
-    if not isinstance(val, str):
-        return fallback
-    v = val.strip().lower()
-    if v in {"short", "medium", "long"}:
-        return v
-    return fallback

-def _coerce_str_list(val: Any) -> List[str]:
-    if val is None:
+def _ensure_str_list(v: Any) -> List[str]:
+    if v is None:
        return []
-    if isinstance(val, list):
-        out: List[str] = []
-        for x in val:
-            if isinstance(x, str):
-                out.append(x)
-            else:
-                out.append(str(x))
-        return out
-    if isinstance(val, str):
-        # allow comma-separated
-        return [x.strip() for x in val.split(",") if x.strip()]
+    if isinstance(v, (list, tuple)):
+        return [str(x) for x in v if x is not None]
+    return [str(v)]
+
+
+def _resolve_type(note_d: Dict[str, Any]) -> str:
+    fm = note_d.get("frontmatter") or {}
+    t = _safe_get(fm, "type") or note_d.get("type")
+    if not t and isinstance(note_d.get("meta"), dict):
+        t = note_d["meta"].get("type")
+    return str(t or "concept")
+
+
+def _resolve_title(note_d: Dict[str, Any]) -> str:
+    fm = note_d.get("frontmatter") or {}
+    t = _safe_get(fm, "title") or note_d.get("title")
+    return str(t or "")
+
+
+def _resolve_note_id(note_d: Dict[str, Any]) -> Optional[str]:
+    for k in ("note_id", "id"):
+        v = note_d.get(k)
+        if isinstance(v, str) and v:
+            return v
+    return None
+
+
+def _resolve_body(note_d: Dict[str, Any]) -> str:
+    for k in ("body", "text", "content"):
+        v = note_d.get(k)
+        if isinstance(v, str) and v.strip():
+            return v
+    return ""
+
+
+def _resolve_defaults_for_type(types_cfg: Dict[str, Any], typ: str) -> Dict[str, Any]:
+    if not isinstance(types_cfg, dict):
+        return {}
+    t = (types_cfg.get("types") or {}).get(typ) or {}
+    return t if isinstance(t, dict) else {}
+
+
+def _compute_retriever_weight(note_d: Dict[str, Any], types_cfg: Dict[str, Any], typ: str) -> float:
+    fm = note_d.get("frontmatter") or {}
+    # 1) Frontmatter
+    if "retriever_weight" in fm:
+        return _coerce_float(fm.get("retriever_weight"), 1.0)
+    # 2) Typ-Defaults
+    tdef = _resolve_defaults_for_type(types_cfg, typ)
+    if "retriever_weight" in tdef:
+        return _coerce_float(tdef.get("retriever_weight"), 1.0)
+    # 3) ENV
+    envv = os.getenv("MINDNET_DEFAULT_RETRIEVER_WEIGHT")
+    if envv:
+        return _coerce_float(envv, 1.0)
+    # 4) Fallback
+    return 1.0
+
+
+def _compute_chunk_profile(note_d: Dict[str, Any], types_cfg: Dict[str, Any], typ: str) -> str:
+    fm = note_d.get("frontmatter") or {}
+    if "chunk_profile" in fm:
+        return str(fm.get("chunk_profile"))
+    tdef = _resolve_defaults_for_type(types_cfg, typ)
+    if "chunk_profile" in tdef:
+        return str(tdef.get("chunk_profile"))
+    envv = os.getenv("MINDNET_DEFAULT_CHUNK_PROFILE")
+    if envv:
+        return str(envv)
+    return "medium"
+
+
+def _compute_edge_defaults(note_d: Dict[str, Any], types_cfg: Dict[str, Any], typ: str) -> List[str]:
+    fm = note_d.get("frontmatter") or {}
+    if "edge_defaults" in fm:
+        return _ensure_str_list(fm.get("edge_defaults"))
+    tdef = _resolve_defaults_for_type(types_cfg, typ)
+    if "edge_defaults" in tdef:
+        return _ensure_str_list(tdef.get("edge_defaults"))
    return []

-def _safe_jsonable(value: Any) -> Any:
-    """Ensure value is JSON-serializable (no sets, Path, callables, etc.)."""
-    if isinstance(value, (str, int, float, bool)) or value is None:
-        return value
-    if isinstance(value, list):
-        return [_safe_jsonable(v) for v in value]
-    if isinstance(value, dict):
-        return {str(k): _safe_jsonable(v) for k, v in value.items()}
-    if isinstance(value, Path):
-        return str(value)
-    # Avoid sets and other iterables that are not JSON-serializable
-    try:
-        json.dumps(value)
-        return value
-    except Exception:
-        return str(value)

 # ------------------------------
-# Config loading
+# Öffentliche API
 # ------------------------------

-def _load_types_config(
-    explicit_config: Optional[Dict[str, Any]] = None,
-    search_root: Union[str, Path, None] = None,
-) -> Dict[str, Any]:
+def make_note_payload(note: Any, *args, **kwargs) -> Dict[str, Any]:
+    """Erzeugt das Payload-Dict für eine Note.
+
+    Akzeptierte zusätzliche kwargs:
+      - types_config: bereits geladene Config (dict mit "types")
+      - search_root / vault_root: Ordner, in dem config/* gesucht wird
    """
-    Load types config from:
-      1) explicit_config (if provided)
-      2) {search_root}/config/config.yaml
-      3) {search_root}/config/types.yaml
-      4) ./config/config.yaml
-      5) ./config/types.yaml
-    Returns a dict with shape: {"types": {...}} (empty if none found).
-    """
-    if explicit_config and isinstance(explicit_config, dict):
-        if "types" in explicit_config and isinstance(explicit_config["types"], dict):
-            return explicit_config
+    note_d = _as_dict(note)

-    candidates: List[Path] = []
-    root = Path(search_root) if search_root else Path.cwd()
-    candidates.append(root / "config" / "config.yaml")
-    candidates.append(root / "config" / "types.yaml")
-    # fallback to CWD when search_root was different
-    candidates.append(Path.cwd() / "config" / "config.yaml")
-    candidates.append(Path.cwd() / "config" / "types.yaml")
+    # Konfig finden
+    types_config = kwargs.get("types_config")
+    search_root = kwargs.get("search_root") or kwargs.get("vault_root")
+    types_cfg = _load_types_config(search_root, types_config)

-    data = {}
-    if yaml is None:
-        return {"types": {}}
+    # Felder auflösen
+    typ = _resolve_type(note_d)
+    title = _resolve_title(note_d)
+    note_id = _resolve_note_id(note_d)
+    body = _resolve_body(note_d)

-    for p in candidates:
-        try:
-            if p.exists():
-                with p.open("r", encoding="utf-8") as f:
-                    loaded = yaml.safe_load(f) or {}
-                    if isinstance(loaded, dict) and isinstance(loaded.get("types"), dict):
-                        data = {"types": loaded["types"]}
-                        break
-        except Exception:
-            continue
-    if not data:
-        data = {"types": {}}
-    return data
-
-def _type_defaults(note_type: str, cfg: Dict[str, Any]) -> Dict[str, Any]:
-    return (cfg.get("types") or {}).get(note_type, {}) if isinstance(cfg, dict) else {}
-
-# ------------------------------
-# Public API
-# ------------------------------
-
-def make_note_payload(
-    parsed_note: Any,
-    *,
-    config: Optional[Dict[str, Any]] = None,
-    search_root: Union[str, Path, None] = None,
-    **kwargs: Any,
-) -> Dict[str, Any]:
-    """
-    Build the payload for a NOTE. Tolerates extra kwargs (e.g., vault_root, prefix).
-    """
-    fm = _frontmatter(parsed_note)
-    note_type = fm.get("type") or _get(parsed_note, "type") or "concept"
-    note_type = str(note_type).strip().lower()
-
-    # Load config and resolve defaults
-    cfg = _load_types_config(config, search_root)
-    defaults = _type_defaults(note_type, cfg)
-
-    # retriever_weight: FM > type-defaults > ENV > 1.0
-    rw = fm.get("retriever_weight")
-    if rw is None:
-        rw = defaults.get("retriever_weight")
-    if rw is None:
-        env_rw = os.getenv("MINDNET_DEFAULT_RETRIEVER_WEIGHT")
-        rw = _coerce_float(env_rw, 1.0)
-    else:
-        rw = _coerce_float(rw, 1.0)
-
-    # chunk_profile: FM > type-defaults > ENV > medium
-    cp = fm.get("chunk_profile")
-    if cp is None:
-        cp = defaults.get("chunk_profile")
-    if cp is None:
-        cp = os.getenv("MINDNET_DEFAULT_CHUNK_PROFILE", "medium")
-    cp = _normalize_chunk_profile(cp, "medium")
-
-    # edge_defaults: FM > type-defaults > empty
-    edge_defs = fm.get("edge_defaults")
-    if edge_defs is None:
-        edge_defs = defaults.get("edge_defaults", [])
-    edge_defs = _coerce_str_list(edge_defs)
+    retriever_weight = _compute_retriever_weight(note_d, types_cfg, typ)
+    chunk_profile = _compute_chunk_profile(note_d, types_cfg, typ)
+    edge_defaults = _compute_edge_defaults(note_d, types_cfg, typ)

+    # Payload zusammenstellen (nur JSON-fähige Typen)
    payload: Dict[str, Any] = {
-        "id": _get(parsed_note, "id"),
-        "note_id": _get(parsed_note, "id"),
-        "title": _get(parsed_note, "title"),
-        "type": note_type,
-        "retriever_weight": float(rw),
-        "chunk_profile": cp,
-        "edge_defaults": edge_defs,
-        # Useful passthrough/meta (all made JSON-safe)
-        "path": _safe_jsonable(_get(parsed_note, "path")),
-        "source": _safe_jsonable(_get(parsed_note, "source")),
+        "type": typ,
+        "title": title,
+        "retriever_weight": float(retriever_weight),
+        "chunk_profile": str(chunk_profile),
+        "edge_defaults": edge_defaults,
    }
+    if note_id:
+        payload["note_id"] = note_id
+    if body:
+        payload["body_preview"] = body[:5000]  # nur Vorschau, Retriever nutzt Chunks

-    # Include raw frontmatter keys (stringify keys; make safe)
+    # Frontmatter relevante Keys durchreichen (ohne Binärdaten/Objekte)
+    fm = note_d.get("frontmatter") or {}
    if isinstance(fm, dict):
        for k, v in fm.items():
-            # avoid overwriting normalized fields
-            if k in {"type", "retriever_weight", "chunk_profile", "edge_defaults"}:
+            if k in ("type", "retriever_weight", "chunk_profile", "edge_defaults"):
                continue
-            payload[f"fm_{k}"] = _safe_jsonable(v)
+            # nur einfache/nützliche Typen durchlassen
+            if isinstance(v, (str, int, float, bool, list, dict)) or v is None:
+                payload[f"fm_{k}"] = v

-    # Remove None values to keep payload clean
-    payload = {k: v for k, v in payload.items() if v is not None}
    return payload