From 0feae50d90a16a6b7493eef515e401bb1fa949ea Mon Sep 17 00:00:00 2001 From: Lars Date: Sat, 8 Nov 2025 17:25:48 +0100 Subject: [PATCH] Dateien nach "scripts" hochladen --- scripts/import_markdown.py | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/scripts/import_markdown.py b/scripts/import_markdown.py index 4f44f46..660d561 100644 --- a/scripts/import_markdown.py +++ b/scripts/import_markdown.py @@ -351,12 +351,11 @@ def main() -> None: changed = args.force_replace or (not has_old) or hash_changed or text_changed do_baseline_only = (args.baseline_modes and has_old and needs_baseline and not changed) - - # -------- Chunks / Embeddings -------- - chunk_pls: List[Dict[str, Any]] = [] - try: - body_text = getattr(parsed, "body", "") or "" - # ---- Type-Registry integration ---- +# -------- Chunks / Embeddings -------- +chunk_pls: List[Dict[str, Any]] = [] +try: + body_text = getattr(parsed, "body", "") or "" + # ---- Type-Registry integration ---- try: note_type = resolve_note_type(fm.get("type"), reg) except Exception: @@ -371,22 +370,22 @@ def main() -> None: fm["chunk_profile"] = prof weight = cfg_type.get("retriever_weight") if weight is not None: - # note_pl exists later; we set it after creation as well fm["retriever_weight"] = float(weight) chunks = assemble_chunks(fm["id"], body_text, fm.get("type", "concept")) chunk_pls = make_chunk_payloads(fm, note_pl["path"], chunks, note_text=body_text) - except Exception as e: - print(json.dumps({"path": path, "note_id": note_id, "error": f"chunk build failed: {type(e).__name__}: {e}"})) - continue +except Exception as e: + print(json.dumps({"path": path, "note_id": note_id, "error": f"chunk build failed: {type(e).__name__}: {e}"})) + continue + +vecs: List[List[float]] = [[0.0] * cfg.dim for _ in chunk_pls] +if embed_texts and chunk_pls: + try: + texts_for_embed = [(pl.get("window") or pl.get("text") or "") for pl in chunk_pls] + vecs = embed_texts(texts_for_embed) + except Exception as e: + print(json.dumps({"path": path, "note_id": note_id, "warn": f"embed_texts failed, using zeros: {e}"})) - vecs: List[List[float]] = [[0.0] * cfg.dim for _ in chunk_pls] - if embed_texts and chunk_pls: - try: - texts_for_embed = [(pl.get("window") or pl.get("text") or "") for pl in chunk_pls] - vecs = embed_texts(texts_for_embed) - except Exception as e: - print(json.dumps({"path": path, "note_id": note_id, "warn": f"embed_texts failed, using zeros: {e}"})) # -------- Edges (robust) -------- edges: List[Dict[str, Any]] = []