Dateien nach "scripts" hochladen
All checks were successful
Deploy mindnet to llm-node / deploy (push) Successful in 3s

This commit is contained in:
Lars 2025-11-16 18:07:56 +01:00
parent 7ce3f982f2
commit c465797654

View File

@ -1,7 +1,7 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
scripts/import_markdown.py
scripts/import_markdown.py (V2.3.1)
Zweck
-----
@ -11,6 +11,12 @@ Zweck
- Integriert eine optionale Type-Registry (types.yaml), um z. B. chunk_profile
und retriever_weight pro Notiz-Typ zu steuern.
Fix in V2.3.1
-------------
- `retriever_weight` wird nun **immer deterministisch** gesetzt:
Frontmatter-Override > types.yaml > Default=1.0 (falls nichts konfiguriert).
Das Feld wird **zwingend** in mindnet_notes **und** mindnet_chunks geschrieben.
Kompatibilität & Fixes
----------------------
- Unterstützt sowohl app.core.derive_edges (bevorzugt) als auch app.core.edges als Fallback
@ -18,7 +24,6 @@ Kompatibilität & Fixes
nicht zu TypeError führen.
- `scroll_filter` wird für alle Scrolls verwendet (Qdrant >= 1.7.x).
- `--purge-before-upsert` entfernt alte Chunks/Edges einer Note, wenn sich die Note geändert hat.
- `retriever_weight` aus types.yaml bzw. Frontmatter wird in Note- und Chunk-Payload gespiegelt.
- Baseline-Hash-Strategie: hash_mode (body|frontmatter|full), hash_source (parsed|raw), hash_normalize (canonical|none).
Aufrufbeispiele
@ -113,7 +118,7 @@ def effective_chunk_profile(note_type: str, reg: dict) -> Optional[str]:
return prof
return None
def effective_retriever_weight(note_type: str, reg: dict) -> Optional[float]:
def effective_retriever_weight_from_registry(note_type: str, reg: dict) -> Optional[float]:
cfg = get_type_config(note_type, reg)
w = cfg.get("retriever_weight")
try:
@ -121,6 +126,25 @@ def effective_retriever_weight(note_type: str, reg: dict) -> Optional[float]:
except Exception:
return None
def compute_effective_retriever_weight(fm: Dict[str, Any], note_type: str, reg: dict) -> float:
"""Ermittelt den finalen retriever_weight:
1) Frontmatter-Override
2) types.yaml (für den type)
3) Default 1.0
"""
# 1) Frontmatter-Override
if fm.get("retriever_weight") is not None:
try:
return float(fm.get("retriever_weight"))
except Exception:
pass
# 2) Registry
r = effective_retriever_weight_from_registry(note_type, reg)
if r is not None:
return float(r)
# 3) Default
return 1.0
# ------------------------------------------------------------
# Sonstige Helper
# ------------------------------------------------------------
@ -345,12 +369,9 @@ def main() -> None:
if prof:
fm["chunk_profile"] = prof
weight = effective_retriever_weight(note_type, reg)
if weight is not None:
try:
fm["retriever_weight"] = float(weight)
except Exception:
pass
# NEU: finalen retriever_weight deterministisch bestimmen
rw = compute_effective_retriever_weight(fm, note_type, reg)
fm["retriever_weight"] = rw # Frontmatter spiegeln, damit nachfolgende Builder konsistent sind
# --- Payload aufbauen (inkl. Hashes) ---
try:
@ -369,12 +390,11 @@ def main() -> None:
if not note_pl.get("fulltext"):
note_pl["fulltext"] = getattr(parsed, "body", "") or ""
# retriever_weight sicher in Note-Payload spiegeln
if "retriever_weight" not in note_pl and fm.get("retriever_weight") is not None:
try:
note_pl["retriever_weight"] = float(fm.get("retriever_weight"))
except Exception:
pass
# NEU: retriever_weight **immer** in Note-Payload setzen
try:
note_pl["retriever_weight"] = float(rw)
except Exception:
note_pl["retriever_weight"] = 1.0
note_id = note_pl.get("note_id") or fm.get("id")
if not note_id:
@ -410,15 +430,13 @@ def main() -> None:
print(json.dumps({"path": path, "note_id": note_id, "error": f"chunk build failed: {type(e).__name__}: {e}"}))
continue
# retriever_weight auf Chunk-Payload spiegeln
if fm.get("retriever_weight") is not None:
try:
rw = float(fm.get("retriever_weight"))
for pl in chunk_pls:
if "retriever_weight" not in pl:
pl["retriever_weight"] = rw
except Exception:
pass
# NEU: retriever_weight **immer** auf Chunk-Payload spiegeln
try:
rwf = float(rw)
except Exception:
rwf = 1.0
for pl in chunk_pls:
pl["retriever_weight"] = rwf
# Embeddings (fallback: Nullvektoren)
vecs: List[List[float]] = [[0.0] * int(cfg.dim) for _ in chunk_pls]
@ -450,6 +468,8 @@ def main() -> None:
summary = {
"note_id": note_id,
"title": fm.get("title"),
"type": fm.get("type"),
"rw": rw,
"chunks": len(chunk_pls),
"edges": len(edges),
"edges_failed": edges_failed,