Dateien nach "scripts" hochladen
All checks were successful
Deploy mindnet to llm-node / deploy (push) Successful in 4s
All checks were successful
Deploy mindnet to llm-node / deploy (push) Successful in 4s
This commit is contained in:
parent
a73542a391
commit
0513673c2a
|
|
@ -1,7 +1,7 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
scripts/import_markdown.py (Mindnet V2 — Importer, v2.5.0)
|
||||
scripts/import_markdown.py (Mindnet V2 — Importer, v2.5.1)
|
||||
|
||||
Zweck
|
||||
-----
|
||||
|
|
@ -292,6 +292,8 @@ def main() -> None:
|
|||
|
||||
# Type-Registry laden
|
||||
reg = load_type_registry()
|
||||
# types_cfg: Direkter Zugriff auf den Knoten "types" (oder leeres Dict)
|
||||
types_cfg = reg.get("types") if isinstance(reg, dict) else {}
|
||||
|
||||
root = os.path.abspath(args.vault)
|
||||
|
||||
|
|
@ -401,7 +403,7 @@ def main() -> None:
|
|||
if not note_pl.get("fulltext"):
|
||||
note_pl["fulltext"] = getattr(parsed, "body", "") or ""
|
||||
|
||||
# retriever_weight **immer** in Note-Payload schreiben
|
||||
# retriever_weight **immer** in Note-Payload schreiben (redundant aber robust)
|
||||
try:
|
||||
note_pl["retriever_weight"] = float(rw)
|
||||
except Exception:
|
||||
|
|
@ -436,7 +438,28 @@ def main() -> None:
|
|||
try:
|
||||
body_text = getattr(parsed, "body", "") or ""
|
||||
chunks = assemble_chunks(fm["id"], body_text, fm.get("type", "concept"))
|
||||
chunk_pls: List[Dict[str, Any]] = make_chunk_payloads(fm, note_pl["path"], chunks, note_text=body_text)
|
||||
|
||||
# *** MINIMALFIX ***
|
||||
# Übergib an make_chunk_payloads ein Note-Objekt mit verschachtelter 'frontmatter'
|
||||
# und reiche die Registry ('types_cfg') durch, damit chunk_payload.py aus types.yaml lesen kann.
|
||||
chunk_note = {
|
||||
"frontmatter": fm,
|
||||
"id": fm.get("id"),
|
||||
"type": fm.get("type"),
|
||||
"title": fm.get("title"),
|
||||
"path": note_pl.get("path") or path,
|
||||
# optional: direkte Felder, falls ein alter Builder darauf schaut
|
||||
"note_id": note_pl.get("note_id"),
|
||||
"tags": fm.get("tags"),
|
||||
}
|
||||
chunk_pls: List[Dict[str, Any]] = make_chunk_payloads(
|
||||
chunk_note,
|
||||
note_pl["path"],
|
||||
chunks,
|
||||
note_text=body_text,
|
||||
types_cfg=(types_cfg if isinstance(types_cfg, dict) else {}),
|
||||
file_path=path,
|
||||
)
|
||||
except Exception as e:
|
||||
print(json.dumps({"path": path, "note_id": note_id, "error": f"chunk build failed: {type(e).__name__}: {e}"}))
|
||||
continue
|
||||
|
|
@ -446,15 +469,15 @@ def main() -> None:
|
|||
if "index" not in pl:
|
||||
pl["index"] = i
|
||||
pl["ord"] = int(pl.get("index", i)) + 1
|
||||
# Entferne ggf. Alt-Aliase, um Duplikate zu vermeiden
|
||||
for alias in ("chunk_num", "Chunk_Number"):
|
||||
if alias in pl:
|
||||
pl.pop(alias, None)
|
||||
|
||||
# retriever_weight **immer** auf Chunk-Payload spiegeln
|
||||
# retriever_weight **immer** auf Chunk-Payload spiegeln (falls Builder es nicht gesetzt hat)
|
||||
rwf = float(rw) if isinstance(rw, (int, float)) else 1.0
|
||||
for pl in chunk_pls:
|
||||
pl["retriever_weight"] = rwf
|
||||
if "retriever_weight" not in pl:
|
||||
pl["retriever_weight"] = rwf
|
||||
|
||||
# Embeddings (fallback: Nullvektoren)
|
||||
vecs: List[List[float]] = [[0.0] * int(cfg.dim) for _ in chunk_pls]
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user