mindnet/app/core/ingestion/ingestion_chunk_payload.py

"""
FILE: app/core/ingestion/ingestion_chunk_payload.py
DESCRIPTION: Baut das JSON-Objekt für 'mindnet_chunks'.
             Fix v2.4.1: Behebt AttributeError bei Zugriff auf Chunk-Objekte.
VERSION: 2.4.1
STATUS: Active
"""
from __future__ import annotations
from typing import Any, Dict, List, Optional

def _as_list(x):
    if x is None: return []
    return x if isinstance(x, list) else [x]

def make_chunk_payloads(note: Dict[str, Any],
                        note_path: str,
                        chunks_from_chunker: List[Any],
                        **kwargs) -> List[Dict[str, Any]]:
    """
    Erstellt die Payloads für die Chunks eines Dokuments.
    Robust gegenüber Chunk-Objekten (Dataclasses) und Dictionaries.
    """
    # Frontmatter Extraktion
    if isinstance(note, dict) and "frontmatter" in note:
        fm = note["frontmatter"]
    else:
        fm = note or {}

    note_type = fm.get("type") or "concept"
    title = fm.get("title") or fm.get("id") or "Untitled"
    tags = _as_list(fm.get("tags") or [])
    cp = fm.get("chunking_profile") or fm.get("chunk_profile") or "sliding_standard"
    rw = float(fm.get("retriever_weight", 1.0))

    out: List[Dict[str, Any]] = []
    for idx, ch in enumerate(chunks_from_chunker):
        # Dynamische Extraktion basierend auf Typ (Objekt vs Dict)
        is_dict = isinstance(ch, dict)

        cid = getattr(ch, "id", None) if not is_dict else ch.get("id")
        nid = getattr(ch, "note_id", None) if not is_dict else ch.get("note_id")
        index = getattr(ch, "index", idx) if not is_dict else ch.get("index", idx)
        text = getattr(ch, "text", "") if not is_dict else ch.get("text", "")
        window = getattr(ch, "window", text) if not is_dict else ch.get("window", text)

        prev_id = getattr(ch, "neighbors_prev", None) if not is_dict else ch.get("neighbors_prev")
        next_id = getattr(ch, "neighbors_next", None) if not is_dict else ch.get("neighbors_next")

        # Korrektur des AttributeError: Nutzt getattr für Objekte, .get für Dicts
        section = getattr(ch, "section_title", "") if not is_dict else ch.get("section", "")

        pl: Dict[str, Any] = {
            "note_id": nid or fm.get("id"),
            "chunk_id": cid,
            "title": title,
            "index": int(index),
            "ord": int(index) + 1,
            "type": note_type,
            "tags": tags,
            "text": text,
            "window": window,
            "neighbors_prev": _as_list(prev_id),
            "neighbors_next": _as_list(next_id),
            "section": section,
            "path": note_path,
            "source_path": kwargs.get("file_path") or note_path,
            "retriever_weight": rw,
            "chunk_profile": cp
        }
        out.append(pl)

    return out