Dateien nach "app/core" hochladen
All checks were successful
Deploy mindnet to llm-node / deploy (push) Successful in 3s
All checks were successful
Deploy mindnet to llm-node / deploy (push) Successful in 3s
This commit is contained in:
parent
ead1b8c1bc
commit
7b56f696d6
|
|
@ -2,19 +2,28 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Modul: app/core/chunk_payload.py
|
||||
Version: 2.3.0
|
||||
Datum: 2025-11-08
|
||||
Version: 2.2.0
|
||||
Datum: 2025-10-06
|
||||
|
||||
Änderungen ggü. 2.2.0
|
||||
----------------------
|
||||
- Optionaler Parameter `chunk_profile: str | None` (abwärtskompatibel).
|
||||
- Bei fehlenden *echten* Fenstern (kein Overlap geliefert) wird das synthetische
|
||||
Overlap anhand des Chunk-Profils (short|medium|long) gewählt. Ohne Profil
|
||||
bleibt das bisherige Verhalten bestehen (Übernahme aus get_sizes(note_type)).
|
||||
Zweck
|
||||
-----
|
||||
Erzeugt Qdrant-Payloads für Chunks. Voll abwärtskompatibel zu v2.0.1.
|
||||
Neu: Wenn der Chunker KEIN Overlap im Fenster liefert (== window fehlt / identisch zur Kernpassage),
|
||||
erzeugen wir FENSTER mit synthetischem Overlap auf Basis chunk_config.get_sizes(note_type)['overlap'].
|
||||
|
||||
Hinweis
|
||||
------
|
||||
IDs, Felder und Vektoren bleiben unverändert.
|
||||
Felder (beibehalten aus 2.0.1):
|
||||
- note_id, chunk_id, id (Alias), chunk_index, seq, path
|
||||
- window (mit Overlap), text (ohne linkes Overlap)
|
||||
- start, end (Offsets im gesamten Body)
|
||||
- overlap_left, overlap_right
|
||||
- token_count?, section_title?, section_path?, type?, title?, tags?
|
||||
|
||||
Kompatibilität:
|
||||
- 'id' == 'chunk_id' als Alias
|
||||
- Pfade bleiben relativ (keine führenden '/'), Backslashes → Slashes
|
||||
- Robust für Chunk-Objekte oder Dicts; Fensterquelle: 'window'|'text'|'content'|'raw'
|
||||
|
||||
Lizenz: MIT (projektintern)
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
|
|
@ -28,13 +37,6 @@ except Exception:
|
|||
# konservativer Default, falls Import fehlschlägt
|
||||
return {"overlap": (40, 60), "target": (250, 350), "max": 500}
|
||||
|
||||
# NEU: optionaler Import – Overlap-Empfehlungen aus der Type-Registry
|
||||
try:
|
||||
from app.core.type_registry import profile_overlap as _profile_overlap
|
||||
except Exception:
|
||||
def _profile_overlap(_profile: Optional[str]) -> tuple[int,int]:
|
||||
return (40, 60)
|
||||
|
||||
|
||||
# ------------------------------- Utils ------------------------------- #
|
||||
|
||||
|
|
@ -110,8 +112,6 @@ def make_chunk_payloads(
|
|||
rel_path: str,
|
||||
chunks: Iterable[Union[Dict[str, Any], Any]],
|
||||
note_text: Optional[str] = None,
|
||||
*, # neue, nur-keyword Parameter bleiben abwärtskompatibel
|
||||
chunk_profile: Optional[str] = None,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Baut Payloads pro Chunk. Falls Fenster ohne Overlap geliefert werden,
|
||||
|
|
@ -158,28 +158,15 @@ def make_chunk_payloads(
|
|||
windows_final = raw_windows[:] # bereits mit Overlap geliefert
|
||||
else:
|
||||
# Keine echten Fenster → Segmente sind identisch zu "Fenstern" (bisher),
|
||||
# wir erzeugen synthetische Fenster mit Overlap.
|
||||
# wir erzeugen synthetische Fenster mit Overlap gemäß Typ
|
||||
segments = [w or "" for w in raw_windows]
|
||||
overlaps_left = []
|
||||
windows_final = []
|
||||
recon = ""
|
||||
|
||||
try:
|
||||
# Bisheriges Verhalten: aus get_sizes(note_type)
|
||||
overlap_low, overlap_high = tuple(_get_sizes(note_type).get("overlap", (40, 60)))
|
||||
except Exception:
|
||||
overlap_low, overlap_high = (40, 60)
|
||||
|
||||
# Registry-Profil (falls vorhanden) übersteuert *nur* den Overlap defensiv
|
||||
if isinstance(chunk_profile, str) and chunk_profile.strip():
|
||||
try:
|
||||
o_low, o_high = _profile_overlap(chunk_profile.strip().lower())
|
||||
# defensiver Clamp: niemals größer als 3x Default
|
||||
overlap_low = max(0, min(o_low, overlap_low * 3))
|
||||
overlap_high = max(overlap_low, min(o_high, overlap_high * 3))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
overlap_target = int(overlap_low)
|
||||
|
||||
for i, seg in enumerate(segments):
|
||||
|
|
@ -268,6 +255,7 @@ def make_chunk_payloads(
|
|||
return payloads
|
||||
|
||||
|
||||
# __main__ Demo (optional)
|
||||
if __name__ == "__main__": # pragma: no cover
|
||||
fm = {"id": "demo", "title": "Demo", "type": "concept"}
|
||||
# Beispiel ohne echte Fenster → erzeugt synthetische Overlaps
|
||||
|
|
@ -276,7 +264,7 @@ if __name__ == "__main__": # pragma: no cover
|
|||
{"id": "demo#2", "text": "Gamma Delta"},
|
||||
{"id": "demo#3", "text": "Delta Epsilon Zeta"},
|
||||
]
|
||||
pls = make_chunk_payloads(fm, "path/demo.md", chunks, note_text="Alpha Beta Gamma Delta Epsilon Zeta", chunk_profile="long")
|
||||
pls = make_chunk_payloads(fm, "path/demo.md", chunks, note_text="Alpha Beta Gamma Delta Epsilon Zeta")
|
||||
from pprint import pprint
|
||||
pprint(pls)
|
||||
recon = "".join(p["text"] for p in pls)
|
||||
|
|
|
|||
|
|
@ -2,17 +2,32 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Modul: app/core/derive_edges.py
|
||||
Version: 1.5.0
|
||||
Datum: 2025-11-08
|
||||
Version: 1.4.0
|
||||
Datum: 2025-10-01
|
||||
|
||||
Änderung
|
||||
--------
|
||||
- Integration der Type-Registry (optional): Ist im Typ die Default-Kante
|
||||
"references" enthalten, werden Note-Scope-References/Backlinks **additiv**
|
||||
aktiviert – auch wenn `include_note_scope_refs=False` übergeben wurde.
|
||||
(Keine Breaking Changes: bestehende Parameter bleiben erhalten.)
|
||||
Zweck
|
||||
-----
|
||||
Robuste Kantenbildung für mindnet (Notes/Chunks):
|
||||
- belongs_to (chunk -> note)
|
||||
- next / prev (chunk-Kette)
|
||||
- references (chunk-scope) aus Chunk.window/text
|
||||
- optional references/backlink (note-scope)
|
||||
|
||||
Weitere Logik (belongs_to/prev/next & chunk-scope references) bleibt unverändert.
|
||||
Wichtig: Wikilinks werden mit der Parser-Funktion `extract_wikilinks` extrahiert,
|
||||
damit Varianten wie [[id#anchor]] oder [[id|label]] korrekt auf 'id' reduziert werden.
|
||||
|
||||
Erwartete Chunk-Payload-Felder:
|
||||
{
|
||||
"note_id": "...",
|
||||
"chunk_id": "...", # Alias "id" ist zulässig
|
||||
"id": "...",
|
||||
"chunk_index": int,
|
||||
"seq": int,
|
||||
"window": str,
|
||||
"text": str,
|
||||
"path": "rel/path.md",
|
||||
...
|
||||
}
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
|
|
@ -21,16 +36,6 @@ from typing import Dict, List, Optional, Iterable
|
|||
# WICHTIG: benutze die Parser-Extraktion für saubere Wikilinks
|
||||
from app.core.parser import extract_wikilinks
|
||||
|
||||
# optional: Type-Registry (Fallback: deaktiviert)
|
||||
try:
|
||||
from app.core.type_registry import load_type_registry, get_type_config
|
||||
_REG = load_type_registry() # prozessweiter Cache
|
||||
except Exception: # pragma: no cover
|
||||
_REG = {"types": {"concept": {"edge_defaults": ["references"]}}}
|
||||
def get_type_config(_t, _r): # type: ignore
|
||||
return {"edge_defaults": ["references"]}
|
||||
|
||||
|
||||
def _get(d: dict, *keys, default=None):
|
||||
for k in keys:
|
||||
if k in d and d[k] is not None:
|
||||
|
|
@ -81,25 +86,9 @@ def build_edges_for_note(
|
|||
- next / prev: zwischen aufeinanderfolgenden Chunks
|
||||
- references: pro Chunk aus window/text
|
||||
- optional note-scope references/backlinks: dedupliziert über alle Chunk-Funde + note_level_references
|
||||
|
||||
Type-Registry-Erweiterung (additiv):
|
||||
- Wenn der *Note-Typ* 'references' in seinen edge_defaults hat, werden
|
||||
note-scope references/backlinks zusätzlich aktiviert.
|
||||
"""
|
||||
edges: List[dict] = []
|
||||
|
||||
# Typ aus Chunk-Payloads ableiten (falls vorhanden)
|
||||
note_type = None
|
||||
for ch in chunks:
|
||||
nt = ch.get("type")
|
||||
if isinstance(nt, str) and nt.strip():
|
||||
note_type = nt.strip().lower()
|
||||
break
|
||||
type_cfg = get_type_config(note_type, _REG)
|
||||
edge_defaults = [e for e in (type_cfg.get("edge_defaults") or []) if isinstance(e, str)]
|
||||
|
||||
want_note_scope_refs = bool(include_note_scope_refs) or ("references" in edge_defaults)
|
||||
|
||||
# belongs_to
|
||||
for ch in chunks:
|
||||
cid = _get(ch, "chunk_id", "id")
|
||||
|
|
@ -130,7 +119,7 @@ def build_edges_for_note(
|
|||
refs_all.extend(refs)
|
||||
|
||||
# optional: note-scope references/backlinks
|
||||
if want_note_scope_refs:
|
||||
if include_note_scope_refs:
|
||||
refs_note = refs_all[:]
|
||||
if note_level_references:
|
||||
refs_note.extend([r for r in note_level_references if isinstance(r, str) and r])
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user