From f6b7652219fdcfe5ee4fff798f357886abe83be3 Mon Sep 17 00:00:00 2001 From: Lars Date: Sat, 8 Nov 2025 17:29:25 +0100 Subject: [PATCH] Dateien nach "scripts" hochladen --- scripts/import_markdown.py | 66 +++++++++++++++++++------------------- 1 file changed, 33 insertions(+), 33 deletions(-) diff --git a/scripts/import_markdown.py b/scripts/import_markdown.py index 660d561..335dc1f 100644 --- a/scripts/import_markdown.py +++ b/scripts/import_markdown.py @@ -351,40 +351,40 @@ def main() -> None: changed = args.force_replace or (not has_old) or hash_changed or text_changed do_baseline_only = (args.baseline_modes and has_old and needs_baseline and not changed) -# -------- Chunks / Embeddings -------- -chunk_pls: List[Dict[str, Any]] = [] -try: - body_text = getattr(parsed, "body", "") or "" - # ---- Type-Registry integration ---- - try: - note_type = resolve_note_type(fm.get("type"), reg) - except Exception: - note_type = (fm.get("type") or "concept") - fm["type"] = note_type or "concept" - try: - cfg_type = get_type_config(note_type, reg) - except Exception: - cfg_type = {} - prof = effective_chunk_profile(note_type, reg) - if prof: - fm["chunk_profile"] = prof - weight = cfg_type.get("retriever_weight") - if weight is not None: - fm["retriever_weight"] = float(weight) + # -------- Chunks / Embeddings -------- + chunk_pls: List[Dict[str, Any]] = [] + try: + body_text = getattr(parsed, "body", "") or "" + # ---- Type-Registry integration ---- + try: + note_type = resolve_note_type(fm.get("type"), reg) + except Exception: + note_type = (fm.get("type") or "concept") + fm["type"] = note_type or "concept" + try: + cfg_type = get_type_config(note_type, reg) + except Exception: + cfg_type = {} + prof = effective_chunk_profile(note_type, reg) + if prof: + fm["chunk_profile"] = prof + weight = cfg_type.get("retriever_weight") + if weight is not None: + fm["retriever_weight"] = float(weight) - chunks = assemble_chunks(fm["id"], body_text, fm.get("type", "concept")) - chunk_pls = make_chunk_payloads(fm, note_pl["path"], chunks, note_text=body_text) -except Exception as e: - print(json.dumps({"path": path, "note_id": note_id, "error": f"chunk build failed: {type(e).__name__}: {e}"})) - continue + chunks = assemble_chunks(fm["id"], body_text, fm.get("type", "concept")) + chunk_pls = make_chunk_payloads(fm, note_pl["path"], chunks, note_text=body_text) + except Exception as e: + print(json.dumps({"path": path, "note_id": note_id, "error": f"chunk build failed: {type(e).__name__}: {e}"})) + continue -vecs: List[List[float]] = [[0.0] * cfg.dim for _ in chunk_pls] -if embed_texts and chunk_pls: - try: - texts_for_embed = [(pl.get("window") or pl.get("text") or "") for pl in chunk_pls] - vecs = embed_texts(texts_for_embed) - except Exception as e: - print(json.dumps({"path": path, "note_id": note_id, "warn": f"embed_texts failed, using zeros: {e}"})) + vecs: List[List[float]] = [[0.0] * cfg.dim for _ in chunk_pls] + if embed_texts and chunk_pls: + try: + texts_for_embed = [(pl.get("window") or pl.get("text") or "") for pl in chunk_pls] + vecs = embed_texts(texts_for_embed) + except Exception as e: + print(json.dumps({"path": path, "note_id": note_id, "warn": f"embed_texts failed, using zeros: {e}"})) # -------- Edges (robust) -------- @@ -464,4 +464,4 @@ if embed_texts and chunk_pls: if __name__ == "__main__": - main() + main() \ No newline at end of file