diff --git a/app/core/chunk_payload.py b/app/core/chunk_payload.py index 375da04..0c00402 100644 --- a/app/core/chunk_payload.py +++ b/app/core/chunk_payload.py @@ -2,8 +2,8 @@ # -*- coding: utf-8 -*- """ Modul: app/core/chunk_payload.py -Version: 2.2.0 -Datum: 2025-10-06 +Version: 2.2.1 +Datum: 2025-11-08 Zweck ----- @@ -17,6 +17,7 @@ Felder (beibehalten aus 2.0.1): - start, end (Offsets im gesamten Body) - overlap_left, overlap_right - token_count?, section_title?, section_path?, type?, title?, tags? + - retriever_weight? (NEU: aus Frontmatter übernommen, numerisch gespeichert) Kompatibilität: - 'id' == 'chunk_id' als Alias @@ -27,18 +28,8 @@ Lizenz: MIT (projektintern) """ from __future__ import annotations - -def _overlap_from_frontmatter(frontmatter: Dict[str, Any], fallback: Tuple[int,int]) -> Tuple[int,int]: - prof = str(frontmatter.get("chunk_profile") or "").strip().lower() - if prof: - try: - return profile_overlap(prof) - except Exception: - return fallback - return fallback - - from typing import Any, Dict, Iterable, List, Optional, Tuple, Union + from app.core.type_registry import profile_overlap try: @@ -232,6 +223,15 @@ def make_chunk_payloads( # 5) Payload-Dicts payloads: List[Dict[str, Any]] = [] + # retriever_weight aus Frontmatter einlesen (einmalig auflösen) + _rw_val: Optional[float] = None + if isinstance(frontmatter, dict) and frontmatter.get("retriever_weight") is not None: + try: + _rw_val = float(frontmatter.get("retriever_weight")) + except Exception: + # Wenn keine Zahl, als None ignorieren (Qdrant-Index verlangt numerisch/Null) + _rw_val = None + for i, (win, seg) in enumerate(zip(windows_final, segments)): chunk_id = ids_in[i] or f"{note_id}#{i+1}" pl: Dict[str, Any] = { @@ -255,13 +255,6 @@ def make_chunk_payloads( pl["title"] = note_title if note_tags is not None: pl["tags"] = note_tags - # propagate optional retriever_weight from frontmatter - try: - _rw = frontmatter.get("retriever_weight") if isinstance(frontmatter, dict) else None - if _rw is not None: - pl["retriever_weight"] = float(_rw) - except Exception: - pass if token_counts[i] is not None: pl["token_count"] = int(token_counts[i]) if section_titles[i] is not None: @@ -269,6 +262,11 @@ def make_chunk_payloads( if section_paths[i] is not None: sp = str(section_paths[i]).replace("\\", "/") pl["section_path"] = sp if sp else "/" + + # ---> HINZUGEFÜGT: retriever_weight pro Chunk aus Frontmatter numerisch mitschreiben + if _rw_val is not None: + pl["retriever_weight"] = _rw_val + payloads.append(pl) return payloads @@ -276,7 +274,7 @@ def make_chunk_payloads( # __main__ Demo (optional) if __name__ == "__main__": # pragma: no cover - fm = {"id": "demo", "title": "Demo", "type": "concept"} + fm = {"id": "demo", "title": "Demo", "type": "concept", "retriever_weight": 0.85} # Beispiel ohne echte Fenster → erzeugt synthetische Overlaps chunks = [ {"id": "demo#1", "text": "Alpha Beta Gamma"}, @@ -287,4 +285,4 @@ if __name__ == "__main__": # pragma: no cover from pprint import pprint pprint(pls) recon = "".join(p["text"] for p in pls) - print("RECON:", recon) \ No newline at end of file + print("RECON:", recon) diff --git a/app/core/note_payload.py b/app/core/note_payload.py index f60db87..9b938f1 100644 --- a/app/core/note_payload.py +++ b/app/core/note_payload.py @@ -1,8 +1,8 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- # Modul: app/core/note_payload.py -# Version: 1.7.0 -# Datum: 2025-09-09 +# Version: 1.7.1 +# Datum: 2025-11-08 from __future__ import annotations @@ -193,6 +193,14 @@ def make_note_payload( "references": refs, } + # ---> HINZUGEFÜGT: retriever_weight auf Note-Ebene – numerisch, falls möglich + if fm.get("retriever_weight") is not None: + try: + payload["retriever_weight"] = float(fm.get("retriever_weight")) + except Exception: + # Falls kein Float (z. B. "high"), als Rohwert ablegen (kompatibel) + payload["retriever_weight"] = fm.get("retriever_weight") + for k in ("area", "project", "source", "lang", "slug", "aliases"): if k in fm: payload[k] = fm[k]