chunk_payload soll nun auch die Overrides schreiben
This commit is contained in:
parent
a4272c17a9
commit
edbd8f0ca8
|
|
@ -3,9 +3,8 @@ FILE: app/core/chunk_payload.py
|
||||||
DESCRIPTION: Baut das JSON-Objekt für 'mindnet_chunks'.
|
DESCRIPTION: Baut das JSON-Objekt für 'mindnet_chunks'.
|
||||||
FEATURES:
|
FEATURES:
|
||||||
- Inkludiert Nachbarschafts-IDs (prev/next) und Titel.
|
- Inkludiert Nachbarschafts-IDs (prev/next) und Titel.
|
||||||
- FIX 1: Korrektes Auslesen von 'chunking_profile' (Frontmatter > Type > Default).
|
- FIX 3: Robuste Erkennung des Inputs (Frontmatter-Dict vs. Note-Objekt), damit Overrides ankommen.
|
||||||
- FIX 2: Korrektes Vererben von 'retriever_weight' (Frontmatter > Type > Default).
|
VERSION: 2.3.0
|
||||||
VERSION: 2.2.0
|
|
||||||
STATUS: Active
|
STATUS: Active
|
||||||
DEPENDENCIES: yaml, os
|
DEPENDENCIES: yaml, os
|
||||||
EXTERNAL_CONFIG: config/types.yaml
|
EXTERNAL_CONFIG: config/types.yaml
|
||||||
|
|
@ -92,31 +91,43 @@ def make_chunk_payloads(note: Dict[str, Any],
|
||||||
file_path: Optional[str] = None) -> List[Dict[str, Any]]:
|
file_path: Optional[str] = None) -> List[Dict[str, Any]]:
|
||||||
"""
|
"""
|
||||||
Erstellt die Payloads für die Chunks.
|
Erstellt die Payloads für die Chunks.
|
||||||
|
|
||||||
|
Argument 'note' kann sein:
|
||||||
|
A) Ein komplexes Objekt/Dict mit Key "frontmatter" (Legacy / Tests)
|
||||||
|
B) Direkt das Frontmatter-Dictionary (Call aus ingestion.py)
|
||||||
"""
|
"""
|
||||||
fm = (note or {}).get("frontmatter", {}) or {}
|
|
||||||
|
# --- FIX 3: Intelligente Erkennung der Input-Daten ---
|
||||||
|
# Wir prüfen: Ist 'note' ein Container MIT 'frontmatter', oder IST es das 'frontmatter'?
|
||||||
|
if isinstance(note, dict) and "frontmatter" in note and isinstance(note["frontmatter"], dict):
|
||||||
|
# Fall A: Container (wir müssen auspacken)
|
||||||
|
fm = note["frontmatter"]
|
||||||
|
else:
|
||||||
|
# Fall B: Direktes Dict (so ruft ingestion.py es auf!)
|
||||||
|
fm = note or {}
|
||||||
|
|
||||||
note_type = fm.get("type") or note.get("type") or "concept"
|
note_type = fm.get("type") or note.get("type") or "concept"
|
||||||
|
|
||||||
# Title Extraction
|
# Title Extraction (Fallback Chain)
|
||||||
title = fm.get("title") or note.get("title") or fm.get("id") or "Untitled"
|
title = fm.get("title") or note.get("title") or fm.get("id") or "Untitled"
|
||||||
|
|
||||||
reg = types_cfg if isinstance(types_cfg, dict) else _load_types()
|
reg = types_cfg if isinstance(types_cfg, dict) else _load_types()
|
||||||
|
|
||||||
# --- Profil-Ermittlung (Fix aus v2.1.0) ---
|
# --- Profil-Ermittlung ---
|
||||||
|
# Da wir 'fm' jetzt korrekt haben, funktionieren diese lookups:
|
||||||
cp = fm.get("chunking_profile") or fm.get("chunk_profile")
|
cp = fm.get("chunking_profile") or fm.get("chunk_profile")
|
||||||
|
|
||||||
if not cp:
|
if not cp:
|
||||||
cp = _resolve_chunk_profile_from_config(note_type, reg)
|
cp = _resolve_chunk_profile_from_config(note_type, reg)
|
||||||
if not cp:
|
if not cp:
|
||||||
cp = "sliding_standard"
|
cp = "sliding_standard"
|
||||||
|
|
||||||
# --- FIX 2: Retriever Weight Ermittlung ---
|
# --- Retriever Weight Ermittlung ---
|
||||||
# 1. Frontmatter Override prüfen
|
|
||||||
rw = fm.get("retriever_weight")
|
rw = fm.get("retriever_weight")
|
||||||
|
|
||||||
# 2. Falls nicht im Frontmatter, aus Config laden
|
|
||||||
if rw is None:
|
if rw is None:
|
||||||
rw = _resolve_retriever_weight_from_config(note_type, reg)
|
rw = _resolve_retriever_weight_from_config(note_type, reg)
|
||||||
|
|
||||||
# 3. Sicherstellen, dass es ein Float ist
|
|
||||||
try:
|
try:
|
||||||
rw = float(rw)
|
rw = float(rw)
|
||||||
except Exception:
|
except Exception:
|
||||||
|
|
@ -152,8 +163,8 @@ def make_chunk_payloads(note: Dict[str, Any],
|
||||||
"section": getattr(ch, "section", None) or (ch.get("section") if isinstance(ch, dict) else ""),
|
"section": getattr(ch, "section", None) or (ch.get("section") if isinstance(ch, dict) else ""),
|
||||||
"path": note_path,
|
"path": note_path,
|
||||||
"source_path": file_path or note_path,
|
"source_path": file_path or note_path,
|
||||||
"retriever_weight": float(rw), # Jetzt korrekt vererbt
|
"retriever_weight": float(rw),
|
||||||
"chunk_profile": cp,
|
"chunk_profile": cp, # Jetzt endlich mit dem Override-Wert!
|
||||||
}
|
}
|
||||||
|
|
||||||
# Cleanup
|
# Cleanup
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user