From c8cdf218f28336e18190f79e3eb959e6fb8620b0 Mon Sep 17 00:00:00 2001 From: Lars Date: Tue, 16 Dec 2025 15:52:50 +0100 Subject: [PATCH] bug fix --- app/core/chunk_payload.py | 59 ++++++++++++++++++++++----------------- 1 file changed, 33 insertions(+), 26 deletions(-) diff --git a/app/core/chunk_payload.py b/app/core/chunk_payload.py index d864b82..3936ab7 100644 --- a/app/core/chunk_payload.py +++ b/app/core/chunk_payload.py @@ -1,10 +1,11 @@ """ FILE: app/core/chunk_payload.py -DESCRIPTION: Baut das JSON-Objekt für 'mindnet_chunks'. +DESCRIPTION: Baut das JSON-Objekt für 'mindnet_chunks'. FEATURES: - Inkludiert Nachbarschafts-IDs (prev/next) und Titel. - - FIX: Korrektes Auslesen von 'chunking_profile' (Frontmatter > Type > Default). -VERSION: 2.1.0 + - FIX 1: Korrektes Auslesen von 'chunking_profile' (Frontmatter > Type > Default). + - FIX 2: Korrektes Vererben von 'retriever_weight' (Frontmatter > Type > Default). +VERSION: 2.2.0 STATUS: Active DEPENDENCIES: yaml, os EXTERNAL_CONFIG: config/types.yaml @@ -42,40 +43,39 @@ def _as_float(x: Any): except Exception: return None def _resolve_chunk_profile_from_config(note_type: str, reg: dict) -> Optional[str]: - """ - Liest das Profil aus der Config (Type > Default). - Prüft 'chunking_profile' UND 'chunk_profile'. - """ # 1. Type Level types = _get_types_map(reg) if isinstance(types, dict): t = types.get(note_type, {}) if isinstance(t, dict): - # Prüfe beide Schreibweisen cp = t.get("chunking_profile") or t.get("chunk_profile") - if isinstance(cp, str) and cp: - return cp - + if isinstance(cp, str) and cp: return cp # 2. Defaults Level defs = _get_defaults(reg) if isinstance(defs, dict): cp = defs.get("chunking_profile") or defs.get("chunk_profile") - if isinstance(cp, str) and cp: - return cp - + if isinstance(cp, str) and cp: return cp return None -def _resolve_retriever_weight(note_type: str, reg: dict) -> float: +def _resolve_retriever_weight_from_config(note_type: str, reg: dict) -> float: + """ + Liest Weight nur aus Config (Type > Default). + Wird aufgerufen, wenn im Frontmatter nichts steht. + """ + # 1. Type Level types = _get_types_map(reg) if isinstance(types, dict): t = types.get(note_type, {}) if isinstance(t, dict) and (t.get("retriever_weight") is not None): v = _as_float(t.get("retriever_weight")) if v is not None: return float(v) + + # 2. Defaults Level defs = _get_defaults(reg) if isinstance(defs, dict) and (defs.get("retriever_weight") is not None): v = _as_float(defs.get("retriever_weight")) if v is not None: return float(v) + return 1.0 def _as_list(x): @@ -101,19 +101,26 @@ def make_chunk_payloads(note: Dict[str, Any], reg = types_cfg if isinstance(types_cfg, dict) else _load_types() - # --- FIX: Profil-Ermittlung --- - # 1. Frontmatter (Override) + # --- Profil-Ermittlung (Fix aus v2.1.0) --- cp = fm.get("chunking_profile") or fm.get("chunk_profile") - - # 2. Config (Type / Default) if not cp: cp = _resolve_chunk_profile_from_config(note_type, reg) - - # 3. Hard Fallback if not cp: - cp = "sliding_standard" # Statt "default" + cp = "sliding_standard" - rw = _resolve_retriever_weight(note_type, reg) + # --- FIX 2: Retriever Weight Ermittlung --- + # 1. Frontmatter Override prüfen + rw = fm.get("retriever_weight") + + # 2. Falls nicht im Frontmatter, aus Config laden + if rw is None: + rw = _resolve_retriever_weight_from_config(note_type, reg) + + # 3. Sicherstellen, dass es ein Float ist + try: + rw = float(rw) + except Exception: + rw = 1.0 tags = fm.get("tags") or [] if isinstance(tags, str): @@ -121,7 +128,7 @@ def make_chunk_payloads(note: Dict[str, Any], out: List[Dict[str, Any]] = [] for idx, ch in enumerate(chunks_from_chunker): - # Attribute sicher extrahieren + # Attribute extrahieren cid = getattr(ch, "id", None) or (ch.get("id") if isinstance(ch, dict) else None) nid = getattr(ch, "note_id", None) or (ch.get("note_id") if isinstance(ch, dict) else fm.get("id")) index = getattr(ch, "index", None) or (ch.get("index") if isinstance(ch, dict) else idx) @@ -145,8 +152,8 @@ def make_chunk_payloads(note: Dict[str, Any], "section": getattr(ch, "section", None) or (ch.get("section") if isinstance(ch, dict) else ""), "path": note_path, "source_path": file_path or note_path, - "retriever_weight": float(rw), - "chunk_profile": cp, # Jetzt korrekt + "retriever_weight": float(rw), # Jetzt korrekt vererbt + "chunk_profile": cp, } # Cleanup