Dateien nach "app/core" hochladen
All checks were successful
Deploy mindnet to llm-node / deploy (push) Successful in 3s

This commit is contained in:
Lars 2025-11-08 21:11:03 +01:00
parent e451ea64ae
commit 52eae52061
2 changed files with 30 additions and 24 deletions

View File

@ -2,8 +2,8 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
""" """
Modul: app/core/chunk_payload.py Modul: app/core/chunk_payload.py
Version: 2.2.0 Version: 2.2.1
Datum: 2025-10-06 Datum: 2025-11-08
Zweck Zweck
----- -----
@ -17,6 +17,7 @@ Felder (beibehalten aus 2.0.1):
- start, end (Offsets im gesamten Body) - start, end (Offsets im gesamten Body)
- overlap_left, overlap_right - overlap_left, overlap_right
- token_count?, section_title?, section_path?, type?, title?, tags? - token_count?, section_title?, section_path?, type?, title?, tags?
- retriever_weight? (NEU: aus Frontmatter übernommen, numerisch gespeichert)
Kompatibilität: Kompatibilität:
- 'id' == 'chunk_id' als Alias - 'id' == 'chunk_id' als Alias
@ -27,18 +28,8 @@ Lizenz: MIT (projektintern)
""" """
from __future__ import annotations from __future__ import annotations
def _overlap_from_frontmatter(frontmatter: Dict[str, Any], fallback: Tuple[int,int]) -> Tuple[int,int]:
prof = str(frontmatter.get("chunk_profile") or "").strip().lower()
if prof:
try:
return profile_overlap(prof)
except Exception:
return fallback
return fallback
from typing import Any, Dict, Iterable, List, Optional, Tuple, Union from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
from app.core.type_registry import profile_overlap from app.core.type_registry import profile_overlap
try: try:
@ -232,6 +223,15 @@ def make_chunk_payloads(
# 5) Payload-Dicts # 5) Payload-Dicts
payloads: List[Dict[str, Any]] = [] payloads: List[Dict[str, Any]] = []
# retriever_weight aus Frontmatter einlesen (einmalig auflösen)
_rw_val: Optional[float] = None
if isinstance(frontmatter, dict) and frontmatter.get("retriever_weight") is not None:
try:
_rw_val = float(frontmatter.get("retriever_weight"))
except Exception:
# Wenn keine Zahl, als None ignorieren (Qdrant-Index verlangt numerisch/Null)
_rw_val = None
for i, (win, seg) in enumerate(zip(windows_final, segments)): for i, (win, seg) in enumerate(zip(windows_final, segments)):
chunk_id = ids_in[i] or f"{note_id}#{i+1}" chunk_id = ids_in[i] or f"{note_id}#{i+1}"
pl: Dict[str, Any] = { pl: Dict[str, Any] = {
@ -255,13 +255,6 @@ def make_chunk_payloads(
pl["title"] = note_title pl["title"] = note_title
if note_tags is not None: if note_tags is not None:
pl["tags"] = note_tags pl["tags"] = note_tags
# propagate optional retriever_weight from frontmatter
try:
_rw = frontmatter.get("retriever_weight") if isinstance(frontmatter, dict) else None
if _rw is not None:
pl["retriever_weight"] = float(_rw)
except Exception:
pass
if token_counts[i] is not None: if token_counts[i] is not None:
pl["token_count"] = int(token_counts[i]) pl["token_count"] = int(token_counts[i])
if section_titles[i] is not None: if section_titles[i] is not None:
@ -269,6 +262,11 @@ def make_chunk_payloads(
if section_paths[i] is not None: if section_paths[i] is not None:
sp = str(section_paths[i]).replace("\\", "/") sp = str(section_paths[i]).replace("\\", "/")
pl["section_path"] = sp if sp else "/" pl["section_path"] = sp if sp else "/"
# ---> HINZUGEFÜGT: retriever_weight pro Chunk aus Frontmatter numerisch mitschreiben
if _rw_val is not None:
pl["retriever_weight"] = _rw_val
payloads.append(pl) payloads.append(pl)
return payloads return payloads
@ -276,7 +274,7 @@ def make_chunk_payloads(
# __main__ Demo (optional) # __main__ Demo (optional)
if __name__ == "__main__": # pragma: no cover if __name__ == "__main__": # pragma: no cover
fm = {"id": "demo", "title": "Demo", "type": "concept"} fm = {"id": "demo", "title": "Demo", "type": "concept", "retriever_weight": 0.85}
# Beispiel ohne echte Fenster → erzeugt synthetische Overlaps # Beispiel ohne echte Fenster → erzeugt synthetische Overlaps
chunks = [ chunks = [
{"id": "demo#1", "text": "Alpha Beta Gamma"}, {"id": "demo#1", "text": "Alpha Beta Gamma"},
@ -287,4 +285,4 @@ if __name__ == "__main__": # pragma: no cover
from pprint import pprint from pprint import pprint
pprint(pls) pprint(pls)
recon = "".join(p["text"] for p in pls) recon = "".join(p["text"] for p in pls)
print("RECON:", recon) print("RECON:", recon)

View File

@ -1,8 +1,8 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Modul: app/core/note_payload.py # Modul: app/core/note_payload.py
# Version: 1.7.0 # Version: 1.7.1
# Datum: 2025-09-09 # Datum: 2025-11-08
from __future__ import annotations from __future__ import annotations
@ -193,6 +193,14 @@ def make_note_payload(
"references": refs, "references": refs,
} }
# ---> HINZUGEFÜGT: retriever_weight auf Note-Ebene numerisch, falls möglich
if fm.get("retriever_weight") is not None:
try:
payload["retriever_weight"] = float(fm.get("retriever_weight"))
except Exception:
# Falls kein Float (z. B. "high"), als Rohwert ablegen (kompatibel)
payload["retriever_weight"] = fm.get("retriever_weight")
for k in ("area", "project", "source", "lang", "slug", "aliases"): for k in ("area", "project", "source", "lang", "slug", "aliases"):
if k in fm: if k in fm:
payload[k] = fm[k] payload[k] = fm[k]