From 0513673c2a055b68878dfdd15bc15698c2c32ed7 Mon Sep 17 00:00:00 2001 From: Lars Date: Sun, 16 Nov 2025 18:38:06 +0100 Subject: [PATCH] Dateien nach "scripts" hochladen --- scripts/import_markdown.py | 35 +++++++++++++++++++++++++++++------ 1 file changed, 29 insertions(+), 6 deletions(-) diff --git a/scripts/import_markdown.py b/scripts/import_markdown.py index 2f9a1fe..59e7c5a 100644 --- a/scripts/import_markdown.py +++ b/scripts/import_markdown.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- """ -scripts/import_markdown.py (Mindnet V2 — Importer, v2.5.0) +scripts/import_markdown.py (Mindnet V2 — Importer, v2.5.1) Zweck ----- @@ -292,6 +292,8 @@ def main() -> None: # Type-Registry laden reg = load_type_registry() + # types_cfg: Direkter Zugriff auf den Knoten "types" (oder leeres Dict) + types_cfg = reg.get("types") if isinstance(reg, dict) else {} root = os.path.abspath(args.vault) @@ -401,7 +403,7 @@ def main() -> None: if not note_pl.get("fulltext"): note_pl["fulltext"] = getattr(parsed, "body", "") or "" - # retriever_weight **immer** in Note-Payload schreiben + # retriever_weight **immer** in Note-Payload schreiben (redundant aber robust) try: note_pl["retriever_weight"] = float(rw) except Exception: @@ -436,7 +438,28 @@ def main() -> None: try: body_text = getattr(parsed, "body", "") or "" chunks = assemble_chunks(fm["id"], body_text, fm.get("type", "concept")) - chunk_pls: List[Dict[str, Any]] = make_chunk_payloads(fm, note_pl["path"], chunks, note_text=body_text) + + # *** MINIMALFIX *** + # Übergib an make_chunk_payloads ein Note-Objekt mit verschachtelter 'frontmatter' + # und reiche die Registry ('types_cfg') durch, damit chunk_payload.py aus types.yaml lesen kann. + chunk_note = { + "frontmatter": fm, + "id": fm.get("id"), + "type": fm.get("type"), + "title": fm.get("title"), + "path": note_pl.get("path") or path, + # optional: direkte Felder, falls ein alter Builder darauf schaut + "note_id": note_pl.get("note_id"), + "tags": fm.get("tags"), + } + chunk_pls: List[Dict[str, Any]] = make_chunk_payloads( + chunk_note, + note_pl["path"], + chunks, + note_text=body_text, + types_cfg=(types_cfg if isinstance(types_cfg, dict) else {}), + file_path=path, + ) except Exception as e: print(json.dumps({"path": path, "note_id": note_id, "error": f"chunk build failed: {type(e).__name__}: {e}"})) continue @@ -446,15 +469,15 @@ def main() -> None: if "index" not in pl: pl["index"] = i pl["ord"] = int(pl.get("index", i)) + 1 - # Entferne ggf. Alt-Aliase, um Duplikate zu vermeiden for alias in ("chunk_num", "Chunk_Number"): if alias in pl: pl.pop(alias, None) - # retriever_weight **immer** auf Chunk-Payload spiegeln + # retriever_weight **immer** auf Chunk-Payload spiegeln (falls Builder es nicht gesetzt hat) rwf = float(rw) if isinstance(rw, (int, float)) else 1.0 for pl in chunk_pls: - pl["retriever_weight"] = rwf + if "retriever_weight" not in pl: + pl["retriever_weight"] = rwf # Embeddings (fallback: Nullvektoren) vecs: List[List[float]] = [[0.0] * int(cfg.dim) for _ in chunk_pls]