#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ app/core/chunk_payload.py (Mindnet V2 — types.yaml authoritative) - neighbors_prev / neighbors_next sind Listen ([], [id]). - retriever_weight / chunk_profile kommen aus types.yaml (Frontmatter wird ignoriert). - Fallbacks: defaults.* in types.yaml; sonst 1.0 / "default". - WP-11 Update: Injects 'title' into chunk payload for Discovery Service. """ from __future__ import annotations from typing import Any, Dict, List, Optional import os, yaml def _env(n: str, d: Optional[str]=None) -> str: v = os.getenv(n) return v if v is not None else (d or "") def _load_types() -> dict: p = _env("MINDNET_TYPES_FILE", "./config/types.yaml") try: with open(p, "r", encoding="utf-8") as f: return yaml.safe_load(f) or {} except Exception: return {} def _get_types_map(reg: dict) -> dict: if isinstance(reg, dict) and isinstance(reg.get("types"), dict): return reg["types"] return reg if isinstance(reg, dict) else {} def _get_defaults(reg: dict) -> dict: if isinstance(reg, dict) and isinstance(reg.get("defaults"), dict): return reg["defaults"] if isinstance(reg, dict) and isinstance(reg.get("global"), dict): return reg["global"] return {} def _as_float(x: Any): try: return float(x) except Exception: return None def _resolve_chunk_profile(note_type: str, reg: dict) -> str: types = _get_types_map(reg) if isinstance(types, dict): t = types.get(note_type, {}) if isinstance(t, dict) and isinstance(t.get("chunk_profile"), str): return t["chunk_profile"] defs = _get_defaults(reg) if isinstance(defs, dict) and isinstance(defs.get("chunk_profile"), str): return defs["chunk_profile"] return "default" def _resolve_retriever_weight(note_type: str, reg: dict) -> float: types = _get_types_map(reg) if isinstance(types, dict): t = types.get(note_type, {}) if isinstance(t, dict) and (t.get("retriever_weight") is not None): v = _as_float(t.get("retriever_weight")) if v is not None: return float(v) defs = _get_defaults(reg) if isinstance(defs, dict) and (defs.get("retriever_weight") is not None): v = _as_float(defs.get("retriever_weight")) if v is not None: return float(v) return 1.0 def _as_list(x): if x is None: return [] if isinstance(x, list): return x return [x] def make_chunk_payloads(note: Dict[str, Any], note_path: str, chunks_from_chunker: List[Any], *, note_text: str = "", types_cfg: Optional[dict] = None, file_path: Optional[str] = None) -> List[Dict[str, Any]]: fm = (note or {}).get("frontmatter", {}) or {} note_type = fm.get("type") or note.get("type") or "concept" # WP-11 FIX: Title Extraction für Discovery Service # Wir holen den Titel aus Frontmatter oder Fallback ID/Untitled title = fm.get("title") or note.get("title") or fm.get("id") or "Untitled" reg = types_cfg if isinstance(types_cfg, dict) else _load_types() # types.yaml authoritative cp = _resolve_chunk_profile(note_type, reg) rw = _resolve_retriever_weight(note_type, reg) tags = fm.get("tags") or [] if isinstance(tags, str): tags = [tags] out: List[Dict[str, Any]] = [] for idx, ch in enumerate(chunks_from_chunker): # Attribute oder Keys (Chunk-Objekt oder Dict) cid = getattr(ch, "id", None) or (ch.get("id") if isinstance(ch, dict) else None) nid = getattr(ch, "note_id", None) or (ch.get("note_id") if isinstance(ch, dict) else fm.get("id")) index = getattr(ch, "index", None) or (ch.get("index") if isinstance(ch, dict) else idx) text = getattr(ch, "text", None) or (ch.get("text") if isinstance(ch, dict) else "") window = getattr(ch, "window", None) or (ch.get("window") if isinstance(ch, dict) else text) prev_id = getattr(ch, "neighbors_prev", None) or (ch.get("neighbors_prev") if isinstance(ch, dict) else None) next_id = getattr(ch, "neighbors_next", None) or (ch.get("neighbors_next") if isinstance(ch, dict) else None) pl: Dict[str, Any] = { "note_id": nid, "chunk_id": cid, "title": title, # <--- HIER: Titel in Payload einfügen "index": int(index), "ord": int(index) + 1, "type": note_type, "tags": tags, "text": text, "window": window, "neighbors_prev": _as_list(prev_id), "neighbors_next": _as_list(next_id), "section": getattr(ch, "section", None) or (ch.get("section") if isinstance(ch, dict) else ""), "path": note_path, "source_path": file_path or note_path, "retriever_weight": float(rw), "chunk_profile": cp, } # Aufräumen von Alt-Feldern for alias in ("chunk_num", "Chunk_Number"): pl.pop(alias, None) out.append(pl) return out