Dateien nach "app/core" hochladen
All checks were successful
Deploy mindnet to llm-node / deploy (push) Successful in 3s
All checks were successful
Deploy mindnet to llm-node / deploy (push) Successful in 3s
This commit is contained in:
parent
e451ea64ae
commit
52eae52061
|
|
@ -2,8 +2,8 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
"""
|
"""
|
||||||
Modul: app/core/chunk_payload.py
|
Modul: app/core/chunk_payload.py
|
||||||
Version: 2.2.0
|
Version: 2.2.1
|
||||||
Datum: 2025-10-06
|
Datum: 2025-11-08
|
||||||
|
|
||||||
Zweck
|
Zweck
|
||||||
-----
|
-----
|
||||||
|
|
@ -17,6 +17,7 @@ Felder (beibehalten aus 2.0.1):
|
||||||
- start, end (Offsets im gesamten Body)
|
- start, end (Offsets im gesamten Body)
|
||||||
- overlap_left, overlap_right
|
- overlap_left, overlap_right
|
||||||
- token_count?, section_title?, section_path?, type?, title?, tags?
|
- token_count?, section_title?, section_path?, type?, title?, tags?
|
||||||
|
- retriever_weight? (NEU: aus Frontmatter übernommen, numerisch gespeichert)
|
||||||
|
|
||||||
Kompatibilität:
|
Kompatibilität:
|
||||||
- 'id' == 'chunk_id' als Alias
|
- 'id' == 'chunk_id' als Alias
|
||||||
|
|
@ -27,18 +28,8 @@ Lizenz: MIT (projektintern)
|
||||||
"""
|
"""
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
|
||||||
def _overlap_from_frontmatter(frontmatter: Dict[str, Any], fallback: Tuple[int,int]) -> Tuple[int,int]:
|
|
||||||
prof = str(frontmatter.get("chunk_profile") or "").strip().lower()
|
|
||||||
if prof:
|
|
||||||
try:
|
|
||||||
return profile_overlap(prof)
|
|
||||||
except Exception:
|
|
||||||
return fallback
|
|
||||||
return fallback
|
|
||||||
|
|
||||||
|
|
||||||
from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
|
from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
|
||||||
|
|
||||||
from app.core.type_registry import profile_overlap
|
from app.core.type_registry import profile_overlap
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
|
@ -232,6 +223,15 @@ def make_chunk_payloads(
|
||||||
|
|
||||||
# 5) Payload-Dicts
|
# 5) Payload-Dicts
|
||||||
payloads: List[Dict[str, Any]] = []
|
payloads: List[Dict[str, Any]] = []
|
||||||
|
# retriever_weight aus Frontmatter einlesen (einmalig auflösen)
|
||||||
|
_rw_val: Optional[float] = None
|
||||||
|
if isinstance(frontmatter, dict) and frontmatter.get("retriever_weight") is not None:
|
||||||
|
try:
|
||||||
|
_rw_val = float(frontmatter.get("retriever_weight"))
|
||||||
|
except Exception:
|
||||||
|
# Wenn keine Zahl, als None ignorieren (Qdrant-Index verlangt numerisch/Null)
|
||||||
|
_rw_val = None
|
||||||
|
|
||||||
for i, (win, seg) in enumerate(zip(windows_final, segments)):
|
for i, (win, seg) in enumerate(zip(windows_final, segments)):
|
||||||
chunk_id = ids_in[i] or f"{note_id}#{i+1}"
|
chunk_id = ids_in[i] or f"{note_id}#{i+1}"
|
||||||
pl: Dict[str, Any] = {
|
pl: Dict[str, Any] = {
|
||||||
|
|
@ -255,13 +255,6 @@ def make_chunk_payloads(
|
||||||
pl["title"] = note_title
|
pl["title"] = note_title
|
||||||
if note_tags is not None:
|
if note_tags is not None:
|
||||||
pl["tags"] = note_tags
|
pl["tags"] = note_tags
|
||||||
# propagate optional retriever_weight from frontmatter
|
|
||||||
try:
|
|
||||||
_rw = frontmatter.get("retriever_weight") if isinstance(frontmatter, dict) else None
|
|
||||||
if _rw is not None:
|
|
||||||
pl["retriever_weight"] = float(_rw)
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
if token_counts[i] is not None:
|
if token_counts[i] is not None:
|
||||||
pl["token_count"] = int(token_counts[i])
|
pl["token_count"] = int(token_counts[i])
|
||||||
if section_titles[i] is not None:
|
if section_titles[i] is not None:
|
||||||
|
|
@ -269,6 +262,11 @@ def make_chunk_payloads(
|
||||||
if section_paths[i] is not None:
|
if section_paths[i] is not None:
|
||||||
sp = str(section_paths[i]).replace("\\", "/")
|
sp = str(section_paths[i]).replace("\\", "/")
|
||||||
pl["section_path"] = sp if sp else "/"
|
pl["section_path"] = sp if sp else "/"
|
||||||
|
|
||||||
|
# ---> HINZUGEFÜGT: retriever_weight pro Chunk aus Frontmatter numerisch mitschreiben
|
||||||
|
if _rw_val is not None:
|
||||||
|
pl["retriever_weight"] = _rw_val
|
||||||
|
|
||||||
payloads.append(pl)
|
payloads.append(pl)
|
||||||
|
|
||||||
return payloads
|
return payloads
|
||||||
|
|
@ -276,7 +274,7 @@ def make_chunk_payloads(
|
||||||
|
|
||||||
# __main__ Demo (optional)
|
# __main__ Demo (optional)
|
||||||
if __name__ == "__main__": # pragma: no cover
|
if __name__ == "__main__": # pragma: no cover
|
||||||
fm = {"id": "demo", "title": "Demo", "type": "concept"}
|
fm = {"id": "demo", "title": "Demo", "type": "concept", "retriever_weight": 0.85}
|
||||||
# Beispiel ohne echte Fenster → erzeugt synthetische Overlaps
|
# Beispiel ohne echte Fenster → erzeugt synthetische Overlaps
|
||||||
chunks = [
|
chunks = [
|
||||||
{"id": "demo#1", "text": "Alpha Beta Gamma"},
|
{"id": "demo#1", "text": "Alpha Beta Gamma"},
|
||||||
|
|
@ -287,4 +285,4 @@ if __name__ == "__main__": # pragma: no cover
|
||||||
from pprint import pprint
|
from pprint import pprint
|
||||||
pprint(pls)
|
pprint(pls)
|
||||||
recon = "".join(p["text"] for p in pls)
|
recon = "".join(p["text"] for p in pls)
|
||||||
print("RECON:", recon)
|
print("RECON:", recon)
|
||||||
|
|
|
||||||
|
|
@ -1,8 +1,8 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
# Modul: app/core/note_payload.py
|
# Modul: app/core/note_payload.py
|
||||||
# Version: 1.7.0
|
# Version: 1.7.1
|
||||||
# Datum: 2025-09-09
|
# Datum: 2025-11-08
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
|
@ -193,6 +193,14 @@ def make_note_payload(
|
||||||
"references": refs,
|
"references": refs,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# ---> HINZUGEFÜGT: retriever_weight auf Note-Ebene – numerisch, falls möglich
|
||||||
|
if fm.get("retriever_weight") is not None:
|
||||||
|
try:
|
||||||
|
payload["retriever_weight"] = float(fm.get("retriever_weight"))
|
||||||
|
except Exception:
|
||||||
|
# Falls kein Float (z. B. "high"), als Rohwert ablegen (kompatibel)
|
||||||
|
payload["retriever_weight"] = fm.get("retriever_weight")
|
||||||
|
|
||||||
for k in ("area", "project", "source", "lang", "slug", "aliases"):
|
for k in ("area", "project", "source", "lang", "slug", "aliases"):
|
||||||
if k in fm:
|
if k in fm:
|
||||||
payload[k] = fm[k]
|
payload[k] = fm[k]
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user