Dateien nach "scripts" hochladen
All checks were successful
Deploy mindnet to llm-node / deploy (push) Successful in 3s
All checks were successful
Deploy mindnet to llm-node / deploy (push) Successful in 3s
This commit is contained in:
parent
7ce3f982f2
commit
c465797654
|
|
@ -1,7 +1,7 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
scripts/import_markdown.py
|
||||
scripts/import_markdown.py (V2.3.1)
|
||||
|
||||
Zweck
|
||||
-----
|
||||
|
|
@ -11,6 +11,12 @@ Zweck
|
|||
- Integriert eine optionale Type-Registry (types.yaml), um z. B. chunk_profile
|
||||
und retriever_weight pro Notiz-Typ zu steuern.
|
||||
|
||||
Fix in V2.3.1
|
||||
-------------
|
||||
- `retriever_weight` wird nun **immer deterministisch** gesetzt:
|
||||
Frontmatter-Override > types.yaml > Default=1.0 (falls nichts konfiguriert).
|
||||
Das Feld wird **zwingend** in mindnet_notes **und** mindnet_chunks geschrieben.
|
||||
|
||||
Kompatibilität & Fixes
|
||||
----------------------
|
||||
- Unterstützt sowohl app.core.derive_edges (bevorzugt) als auch app.core.edges als Fallback
|
||||
|
|
@ -18,7 +24,6 @@ Kompatibilität & Fixes
|
|||
nicht zu TypeError führen.
|
||||
- `scroll_filter` wird für alle Scrolls verwendet (Qdrant >= 1.7.x).
|
||||
- `--purge-before-upsert` entfernt alte Chunks/Edges einer Note, wenn sich die Note geändert hat.
|
||||
- `retriever_weight` aus types.yaml bzw. Frontmatter wird in Note- und Chunk-Payload gespiegelt.
|
||||
- Baseline-Hash-Strategie: hash_mode (body|frontmatter|full), hash_source (parsed|raw), hash_normalize (canonical|none).
|
||||
|
||||
Aufrufbeispiele
|
||||
|
|
@ -113,7 +118,7 @@ def effective_chunk_profile(note_type: str, reg: dict) -> Optional[str]:
|
|||
return prof
|
||||
return None
|
||||
|
||||
def effective_retriever_weight(note_type: str, reg: dict) -> Optional[float]:
|
||||
def effective_retriever_weight_from_registry(note_type: str, reg: dict) -> Optional[float]:
|
||||
cfg = get_type_config(note_type, reg)
|
||||
w = cfg.get("retriever_weight")
|
||||
try:
|
||||
|
|
@ -121,6 +126,25 @@ def effective_retriever_weight(note_type: str, reg: dict) -> Optional[float]:
|
|||
except Exception:
|
||||
return None
|
||||
|
||||
def compute_effective_retriever_weight(fm: Dict[str, Any], note_type: str, reg: dict) -> float:
|
||||
"""Ermittelt den finalen retriever_weight:
|
||||
1) Frontmatter-Override
|
||||
2) types.yaml (für den type)
|
||||
3) Default 1.0
|
||||
"""
|
||||
# 1) Frontmatter-Override
|
||||
if fm.get("retriever_weight") is not None:
|
||||
try:
|
||||
return float(fm.get("retriever_weight"))
|
||||
except Exception:
|
||||
pass
|
||||
# 2) Registry
|
||||
r = effective_retriever_weight_from_registry(note_type, reg)
|
||||
if r is not None:
|
||||
return float(r)
|
||||
# 3) Default
|
||||
return 1.0
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# Sonstige Helper
|
||||
# ------------------------------------------------------------
|
||||
|
|
@ -345,12 +369,9 @@ def main() -> None:
|
|||
if prof:
|
||||
fm["chunk_profile"] = prof
|
||||
|
||||
weight = effective_retriever_weight(note_type, reg)
|
||||
if weight is not None:
|
||||
try:
|
||||
fm["retriever_weight"] = float(weight)
|
||||
except Exception:
|
||||
pass
|
||||
# NEU: finalen retriever_weight deterministisch bestimmen
|
||||
rw = compute_effective_retriever_weight(fm, note_type, reg)
|
||||
fm["retriever_weight"] = rw # Frontmatter spiegeln, damit nachfolgende Builder konsistent sind
|
||||
|
||||
# --- Payload aufbauen (inkl. Hashes) ---
|
||||
try:
|
||||
|
|
@ -369,12 +390,11 @@ def main() -> None:
|
|||
if not note_pl.get("fulltext"):
|
||||
note_pl["fulltext"] = getattr(parsed, "body", "") or ""
|
||||
|
||||
# retriever_weight sicher in Note-Payload spiegeln
|
||||
if "retriever_weight" not in note_pl and fm.get("retriever_weight") is not None:
|
||||
try:
|
||||
note_pl["retriever_weight"] = float(fm.get("retriever_weight"))
|
||||
except Exception:
|
||||
pass
|
||||
# NEU: retriever_weight **immer** in Note-Payload setzen
|
||||
try:
|
||||
note_pl["retriever_weight"] = float(rw)
|
||||
except Exception:
|
||||
note_pl["retriever_weight"] = 1.0
|
||||
|
||||
note_id = note_pl.get("note_id") or fm.get("id")
|
||||
if not note_id:
|
||||
|
|
@ -410,15 +430,13 @@ def main() -> None:
|
|||
print(json.dumps({"path": path, "note_id": note_id, "error": f"chunk build failed: {type(e).__name__}: {e}"}))
|
||||
continue
|
||||
|
||||
# retriever_weight auf Chunk-Payload spiegeln
|
||||
if fm.get("retriever_weight") is not None:
|
||||
try:
|
||||
rw = float(fm.get("retriever_weight"))
|
||||
for pl in chunk_pls:
|
||||
if "retriever_weight" not in pl:
|
||||
pl["retriever_weight"] = rw
|
||||
except Exception:
|
||||
pass
|
||||
# NEU: retriever_weight **immer** auf Chunk-Payload spiegeln
|
||||
try:
|
||||
rwf = float(rw)
|
||||
except Exception:
|
||||
rwf = 1.0
|
||||
for pl in chunk_pls:
|
||||
pl["retriever_weight"] = rwf
|
||||
|
||||
# Embeddings (fallback: Nullvektoren)
|
||||
vecs: List[List[float]] = [[0.0] * int(cfg.dim) for _ in chunk_pls]
|
||||
|
|
@ -450,6 +468,8 @@ def main() -> None:
|
|||
summary = {
|
||||
"note_id": note_id,
|
||||
"title": fm.get("title"),
|
||||
"type": fm.get("type"),
|
||||
"rw": rw,
|
||||
"chunks": len(chunk_pls),
|
||||
"edges": len(edges),
|
||||
"edges_failed": edges_failed,
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user