Dateien nach "scripts" hochladen
All checks were successful
Deploy mindnet to llm-node / deploy (push) Successful in 4s

This commit is contained in:
Lars 2025-11-16 18:38:06 +01:00
parent a73542a391
commit 0513673c2a

View File

@ -1,7 +1,7 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
scripts/import_markdown.py (Mindnet V2 Importer, v2.5.0)
scripts/import_markdown.py (Mindnet V2 Importer, v2.5.1)
Zweck
-----
@ -292,6 +292,8 @@ def main() -> None:
# Type-Registry laden
reg = load_type_registry()
# types_cfg: Direkter Zugriff auf den Knoten "types" (oder leeres Dict)
types_cfg = reg.get("types") if isinstance(reg, dict) else {}
root = os.path.abspath(args.vault)
@ -401,7 +403,7 @@ def main() -> None:
if not note_pl.get("fulltext"):
note_pl["fulltext"] = getattr(parsed, "body", "") or ""
# retriever_weight **immer** in Note-Payload schreiben
# retriever_weight **immer** in Note-Payload schreiben (redundant aber robust)
try:
note_pl["retriever_weight"] = float(rw)
except Exception:
@ -436,7 +438,28 @@ def main() -> None:
try:
body_text = getattr(parsed, "body", "") or ""
chunks = assemble_chunks(fm["id"], body_text, fm.get("type", "concept"))
chunk_pls: List[Dict[str, Any]] = make_chunk_payloads(fm, note_pl["path"], chunks, note_text=body_text)
# *** MINIMALFIX ***
# Übergib an make_chunk_payloads ein Note-Objekt mit verschachtelter 'frontmatter'
# und reiche die Registry ('types_cfg') durch, damit chunk_payload.py aus types.yaml lesen kann.
chunk_note = {
"frontmatter": fm,
"id": fm.get("id"),
"type": fm.get("type"),
"title": fm.get("title"),
"path": note_pl.get("path") or path,
# optional: direkte Felder, falls ein alter Builder darauf schaut
"note_id": note_pl.get("note_id"),
"tags": fm.get("tags"),
}
chunk_pls: List[Dict[str, Any]] = make_chunk_payloads(
chunk_note,
note_pl["path"],
chunks,
note_text=body_text,
types_cfg=(types_cfg if isinstance(types_cfg, dict) else {}),
file_path=path,
)
except Exception as e:
print(json.dumps({"path": path, "note_id": note_id, "error": f"chunk build failed: {type(e).__name__}: {e}"}))
continue
@ -446,15 +469,15 @@ def main() -> None:
if "index" not in pl:
pl["index"] = i
pl["ord"] = int(pl.get("index", i)) + 1
# Entferne ggf. Alt-Aliase, um Duplikate zu vermeiden
for alias in ("chunk_num", "Chunk_Number"):
if alias in pl:
pl.pop(alias, None)
# retriever_weight **immer** auf Chunk-Payload spiegeln
# retriever_weight **immer** auf Chunk-Payload spiegeln (falls Builder es nicht gesetzt hat)
rwf = float(rw) if isinstance(rw, (int, float)) else 1.0
for pl in chunk_pls:
pl["retriever_weight"] = rwf
if "retriever_weight" not in pl:
pl["retriever_weight"] = rwf
# Embeddings (fallback: Nullvektoren)
vecs: List[List[float]] = [[0.0] * int(cfg.dim) for _ in chunk_pls]