Dateien nach "app/core" hochladen

2025-11-16 18:56:33 +01:00 · 2025-11-16 18:56:33 +01:00 · bbc8f13944
commit bbc8f13944
parent f18a40d76c
1 changed files with 18 additions and 41 deletions
--- a/app/core/chunk_payload.py
+++ b/app/core/chunk_payload.py
@ -1,33 +1,12 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 """
-app/core/chunk_payload.py  (Mindnet V2 — robust)
+app/core/chunk_payload.py  (Mindnet V2 — robust v2)
 Aufgabe
 -------
 Erzeugt Chunk-Payloads aus den vom Chunker gelieferten "Chunk"-Objekten.
 - Spiegelt `retriever_weight` und `chunk_profile` in **jedem** Chunk-Payload.
 - Werteauflösung: Frontmatter > types.yaml > Defaults.
 - Lädt `config/types.yaml` selbst, wenn `types_cfg` nicht übergeben wurde.
 Eingang
 -------
 - note: Dict mit mind. { frontmatter: {...}, id, type, title, path }
 - note_path: Pfad der Note (für Payload-Feld `path`)
 - chunks_from_chunker: Liste von Objekten mit Attributen/Feldern:
  id, note_id, index, text, window, neighbors_prev, neighbors_next
 - note_text: voller Text der Note (optional, kann leer sein)
 - types_cfg: optional; wenn None → config wird intern geladen
 - file_path: optional, für Debug/Tracing im Payload
 Ausgang (pro Chunk)
 -------------------
 - Pflichtfelder: note_id, chunk_id, index (0-basiert), ord (1-basiert), type, tags
 - Texte: text, window
 - Nachbarn: neighbors_prev, neighbors_next
 - Spiegelungen: retriever_weight, chunk_profile
 - Meta: source_path, path, section (leer), created/updated opt. aus Frontmatter
 Änderungen ggü. v1:
 - neighbors_prev / neighbors_next werden als **Array** persistiert ([], [id]).
 - retriever_weight / chunk_profile werden je Chunk aufgelöst (Frontmatter > types.yaml > Defaults).
 - Lädt config/types.yaml selbst, wenn types_cfg nicht übergeben wurde.
 """
 from __future__ import annotations
 from typing import Any, Dict, List, Optional
@ -60,10 +39,8 @@ def _load_types_local() -> dict:
        return {}
 def _effective_chunk_profile(note_type: str, fm: Dict[str, Any], reg: dict) -> Optional[str]:
    # Frontmatter zuerst
    if isinstance(fm.get("chunk_profile"), str):
        return fm.get("chunk_profile")
    # Registry
    types = reg.get("types") if isinstance(reg.get("types"), dict) else reg
    if isinstance(types, dict):
        v = types.get(note_type, {})
@ -74,12 +51,10 @@ def _effective_chunk_profile(note_type: str, fm: Dict[str, Any], reg: dict) -> O
    return None
 def _effective_retriever_weight(note_type: str, fm: Dict[str, Any], reg: dict) -> float:
    # Frontmatter zuerst
    if fm.get("retriever_weight") is not None:
        v = _as_float(fm.get("retriever_weight"))
        if v is not None:
            return float(v)
    # Registry-Pfade
    types = reg.get("types") if isinstance(reg.get("types"), dict) else reg
    candidates = [
        f"{note_type}.retriever_weight",
@ -91,16 +66,21 @@ def _effective_retriever_weight(note_type: str, fm: Dict[str, Any], reg: dict) -
        "global.retriever.weight",
    ]
    for path in candidates:
        # Wenn types == reg-root (flatten), erlauben sowohl "types.X" als auch "X"
        val = _deep_get(types, path) if "." in path else (types.get(path) if isinstance(types, dict) else None)
        if val is None and isinstance(reg, dict):
            # versuche absolute Pfade
            val = _deep_get(reg, f"types.{path}")
        v = _as_float(val)
        if v is not None:
            return float(v)
    return 1.0
 def _as_list(x):
    if x is None:
        return []
    if isinstance(x, list):
        return x
    return [x]
 def make_chunk_payloads(note: Dict[str, Any],
                        note_path: str,
                        chunks_from_chunker: List[Any],
@ -108,11 +88,10 @@ def make_chunk_payloads(note: Dict[str, Any],
                        note_text: str = "",
                        types_cfg: Optional[dict] = None,
                        file_path: Optional[str] = None) -> List[Dict[str, Any]]:
-    fm = (note or {}).get("frontmatter", {})
+    fm = (note or {}).get("frontmatter", {}) or {}
    note_type = fm.get("type") or note.get("type") or "concept"
    reg = types_cfg if isinstance(types_cfg, dict) else _load_types_local()
    # Effektive Werte bestimmen
    cp = _effective_chunk_profile(note_type, fm, reg)
    rw = _effective_retriever_weight(note_type, fm, reg)
@ -121,9 +100,8 @@ def make_chunk_payloads(note: Dict[str, Any],
        tags = [tags]
    out: List[Dict[str, Any]] = []
    for idx, ch in enumerate(chunks_from_chunker):
-        # Chunk-Grunddaten (Attribute oder Keys)
+        # Attribute oder Keys (Chunk-Objekt oder Dict)
        cid = getattr(ch, "id", None) or (ch.get("id") if isinstance(ch, dict) else None)
        nid = getattr(ch, "note_id", None) or (ch.get("note_id") if isinstance(ch, dict) else fm.get("id"))
        index = getattr(ch, "index", None) or (ch.get("index") if isinstance(ch, dict) else idx)
@ -141,8 +119,8 @@ def make_chunk_payloads(note: Dict[str, Any],
            "tags": tags,
            "text": text,
            "window": window,
-            "neighbors_prev": prev_id,
+            "neighbors_prev": _as_list(prev_id),
-            "neighbors_next": next_id,
+            "neighbors_next": _as_list(next_id),
            "section": getattr(ch, "section", None) or (ch.get("section") if isinstance(ch, dict) else ""),
            "path": note_path,
            "source_path": file_path or note_path,
@ -151,10 +129,9 @@ def make_chunk_payloads(note: Dict[str, Any],
        if cp is not None:
            pl["chunk_profile"] = cp
-        # Aufräumen: keine historischen Aliasfelder
+        # Aufräumen
        for alias in ("chunk_num", "Chunk_Number"):
-            if alias in pl:
+            pl.pop(alias, None)
                pl.pop(alias, None)
        out.append(pl)