scriptAudit #11
|
|
@ -3,9 +3,8 @@ FILE: app/core/chunk_payload.py
|
|||
DESCRIPTION: Baut das JSON-Objekt für 'mindnet_chunks'.
|
||||
FEATURES:
|
||||
- Inkludiert Nachbarschafts-IDs (prev/next) und Titel.
|
||||
- FIX 1: Korrektes Auslesen von 'chunking_profile' (Frontmatter > Type > Default).
|
||||
- FIX 2: Korrektes Vererben von 'retriever_weight' (Frontmatter > Type > Default).
|
||||
VERSION: 2.2.0
|
||||
- FIX 3: Robuste Erkennung des Inputs (Frontmatter-Dict vs. Note-Objekt), damit Overrides ankommen.
|
||||
VERSION: 2.3.0
|
||||
STATUS: Active
|
||||
DEPENDENCIES: yaml, os
|
||||
EXTERNAL_CONFIG: config/types.yaml
|
||||
|
|
@ -92,31 +91,43 @@ def make_chunk_payloads(note: Dict[str, Any],
|
|||
file_path: Optional[str] = None) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Erstellt die Payloads für die Chunks.
|
||||
|
||||
Argument 'note' kann sein:
|
||||
A) Ein komplexes Objekt/Dict mit Key "frontmatter" (Legacy / Tests)
|
||||
B) Direkt das Frontmatter-Dictionary (Call aus ingestion.py)
|
||||
"""
|
||||
fm = (note or {}).get("frontmatter", {}) or {}
|
||||
|
||||
# --- FIX 3: Intelligente Erkennung der Input-Daten ---
|
||||
# Wir prüfen: Ist 'note' ein Container MIT 'frontmatter', oder IST es das 'frontmatter'?
|
||||
if isinstance(note, dict) and "frontmatter" in note and isinstance(note["frontmatter"], dict):
|
||||
# Fall A: Container (wir müssen auspacken)
|
||||
fm = note["frontmatter"]
|
||||
else:
|
||||
# Fall B: Direktes Dict (so ruft ingestion.py es auf!)
|
||||
fm = note or {}
|
||||
|
||||
note_type = fm.get("type") or note.get("type") or "concept"
|
||||
|
||||
# Title Extraction
|
||||
# Title Extraction (Fallback Chain)
|
||||
title = fm.get("title") or note.get("title") or fm.get("id") or "Untitled"
|
||||
|
||||
reg = types_cfg if isinstance(types_cfg, dict) else _load_types()
|
||||
|
||||
# --- Profil-Ermittlung (Fix aus v2.1.0) ---
|
||||
# --- Profil-Ermittlung ---
|
||||
# Da wir 'fm' jetzt korrekt haben, funktionieren diese lookups:
|
||||
cp = fm.get("chunking_profile") or fm.get("chunk_profile")
|
||||
|
||||
if not cp:
|
||||
cp = _resolve_chunk_profile_from_config(note_type, reg)
|
||||
if not cp:
|
||||
cp = "sliding_standard"
|
||||
|
||||
# --- FIX 2: Retriever Weight Ermittlung ---
|
||||
# 1. Frontmatter Override prüfen
|
||||
# --- Retriever Weight Ermittlung ---
|
||||
rw = fm.get("retriever_weight")
|
||||
|
||||
# 2. Falls nicht im Frontmatter, aus Config laden
|
||||
if rw is None:
|
||||
rw = _resolve_retriever_weight_from_config(note_type, reg)
|
||||
|
||||
# 3. Sicherstellen, dass es ein Float ist
|
||||
try:
|
||||
rw = float(rw)
|
||||
except Exception:
|
||||
|
|
@ -152,8 +163,8 @@ def make_chunk_payloads(note: Dict[str, Any],
|
|||
"section": getattr(ch, "section", None) or (ch.get("section") if isinstance(ch, dict) else ""),
|
||||
"path": note_path,
|
||||
"source_path": file_path or note_path,
|
||||
"retriever_weight": float(rw), # Jetzt korrekt vererbt
|
||||
"chunk_profile": cp,
|
||||
"retriever_weight": float(rw),
|
||||
"chunk_profile": cp, # Jetzt endlich mit dem Override-Wert!
|
||||
}
|
||||
|
||||
# Cleanup
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user