Dateien nach "app/core" hochladen

2025-11-11 17:01:19 +01:00 · 2025-11-11 17:01:19 +01:00 · 2a1c62aeed
commit 2a1c62aeed
parent 948d6f4b47
1 changed files with 21 additions and 14 deletions
--- a/app/core/chunk_payload.py
+++ b/app/core/chunk_payload.py
@ -3,16 +3,14 @@
 """
 app/core/chunk_payload.py  — Mindnet V2 (compat)

-Ziel:
+Ziele:
 - Bewahrt bestehendes Verhalten (index, chunk_profile, retriever_weight, etc.)
- Ergänzt optionale Denormalisierung: `tags` aus der Note‑FM auch auf Chunks
+- Denormalisiert optional `tags` aus der Note‑FM auf Chunks
 - Fügt Aliase für die Chunk‑Nummer hinzu: `ord` (v2‑Schema), `chunk_num`, `Chunk_Nummer`
-  (Letztere ist rein UI/Filter-freundlich für deine bestehenden Indizes mit dt. Keys.)
+- **Kompatibilität:** akzeptiert sowohl `path_arg` (positional) als auch `file_path` (keyword)

 Hinweis:
- `edge_defaults` gehören konzeptionell zur *Note* (Regelmenge des Quelltyps)
-  und werden nicht pro Chunk repliziert. Falls gewünscht, kann das optional
-  ergänzt werden – aktuell **nicht** gesetzt, siehe Design-Kommentar im PR.
+- `edge_defaults` sind Note‑Regeln (Typ) und werden nicht pro Chunk gespiegelt.
 """
 from __future__ import annotations

@ -53,23 +51,31 @@ def _load_types_config(types_cfg_explicit: Optional[dict] = None) -> dict:
    """Types-Registry *optional* einspeisen (bereits geparst), sonst lazy-laden vermeiden."""
    return types_cfg_explicit or {}

-def _text_from_note(note: Dict[str, Any], path: str) -> str:
+def _text_from_note(note: Dict[str, Any]) -> str:
    # Erwartete Inputs (siehe parser.py / import_markdown.py):
    # note["body"] oder note["text"]; Fallback leerer String
    return note.get("body") or note.get("text") or ""

 def _iter_chunks(note: Dict[str, Any], chunk_profile: str, fulltext: str) -> List[Dict[str, Any]]:
-    """Nutze bestehenden assemble_chunks(note_id, body, type) Pfad, keine Doppel-Logik hier."""
+    """Nutze bestehenden assemble_chunks(note_id, body, type)."""
    note_id = note.get("id") or (note.get("frontmatter") or {}).get("id")
    ntype = (note.get("frontmatter") or {}).get("type") or note.get("type") or "note"
    # assemble_chunks liefert Liste von Dicts mit mindestens {"index","text"} (v1)
    return assemble_chunks(note_id, fulltext, ntype)

-def make_chunk_payloads(note: Any, path_arg: Optional[str], chunks_from_chunker: Optional[List[Dict[str, Any]]] = None, *, note_text: Optional[str] = None, types_cfg: Optional[dict] = None) -> List[Dict[str, Any]]:
+def make_chunk_payloads(
+    note: Any,
+    path_arg: Optional[str] = None,
+    chunks_from_chunker: Optional[List[Dict[str, Any]]] = None,
+    *,
+    file_path: Optional[str] = None,
+    note_text: Optional[str] = None,
+    types_cfg: Optional[dict] = None,
+) -> List[Dict[str, Any]]:
    """
    Erzeugt Chunk-Payloads. Erwartet:
      - `note`: Normalisierte Note-Struktur (inkl. frontmatter)
-      - `path_arg`: Pfad der Note
+      - `path_arg` oder `file_path`: Pfad der Note
      - `chunks_from_chunker`: optional: Ergebnis von assemble_chunks (sonst wird intern erzeugt)

    Rückgabe: Liste aus Payload-Dicts, jedes mit mind.:
@ -95,7 +101,8 @@ def make_chunk_payloads(note: Any, path_arg: Optional[str], chunks_from_chunker:

    note_id = n.get("note_id") or n.get("id") or fm.get("id")
    title = n.get("title") or fm.get("title") or ""
-    path = n.get("path") or path_arg
+    # Pfad-Auflösung: Priorität file_path > note['path'] > path_arg
+    path = file_path or n.get("path") or path_arg
    if isinstance(path, pathlib.Path):
        path = str(path)
    path = path or ""  # garantiert vorhanden
@ -105,7 +112,7 @@ def make_chunk_payloads(note: Any, path_arg: Optional[str], chunks_from_chunker:
    tags_list = _ensure_list(tags) if tags else []

    # Quelltext
-    fulltext = note_text if isinstance(note_text, str) else _text_from_note(n, path)
+    fulltext = note_text if isinstance(note_text, str) else _text_from_note(n)

    # Chunks besorgen
    chunks = chunks_from_chunker if isinstance(chunks_from_chunker, list) else _iter_chunks(n, chunk_profile, fulltext)
@ -127,11 +134,11 @@ def make_chunk_payloads(note: Any, path_arg: Optional[str], chunks_from_chunker:
            "index": idx,
            "ord": idx,               # Alias für v2‑Schema
            "chunk_num": idx,         # neutraler Alias
-            "Chunk_Nummer": idx,      # deutschsprachiger Alias (zur Filter‑Kompatibilität)
+            "Chunk_Nummer": idx,      # deutschsprachiger Alias
            "title": title,
            "type": note_type,
            "path": path,             # garantiert vorhanden
-            "text": text,             # nie leer, sonst kein Chunk
+            "text": text,             # nie leer
            "retriever_weight": retriever_weight,
            "chunk_profile": chunk_profile,
        }