From 8bf370751126982d064a559c94310e5cf49c8250 Mon Sep 17 00:00:00 2001 From: Lars Date: Sat, 8 Nov 2025 18:06:33 +0100 Subject: [PATCH] Dateien nach "scripts" hochladen --- scripts/import_markdown.py | 82 ++++++++++++++------------------------ 1 file changed, 30 insertions(+), 52 deletions(-) diff --git a/scripts/import_markdown.py b/scripts/import_markdown.py index b9a4586..efb14a3 100644 --- a/scripts/import_markdown.py +++ b/scripts/import_markdown.py @@ -4,27 +4,6 @@ Script: scripts/import_markdown.py — Markdown → Qdrant (Notes, Chunks, Edges) Version: 3.7.2 Datum: 2025-09-30 -# ---- helpers ---- -def effective_chunk_profile(note_type: str, registry: dict) -> str | None: - try: - tcfg = (registry or {}).get("types", {}).get(note_type) or (registry or {}).get("types", {}).get("concept") - prof = (tcfg or {}).get("chunk_profile") - if isinstance(prof, str) and prof in {"short", "medium", "long"}: - return prof - except Exception: - pass - return None - -def effective_retriever_weight(note_type: str, registry: dict) -> float | None: - try: - tcfg = (registry or {}).get("types", {}).get(note_type) or (registry or {}).get("types", {}).get("concept") - w = (tcfg or {}).get("retriever_weight") - if w is None: - return None - return float(w) - except Exception: - return None - Kurzbeschreibung ---------------- @@ -372,39 +351,11 @@ def main() -> None: changed = args.force_replace or (not has_old) or hash_changed or text_changed do_baseline_only = (args.baseline_modes and has_old and needs_baseline and not changed) + # -------- Chunks / Embeddings -------- chunk_pls: List[Dict[str, Any]] = [] try: body_text = getattr(parsed, "body", "") or "" - # ---- Type-Registry integration ---- - try: - note_type = resolve_note_type(fm.get("type"), reg) - except Exception: - note_type = (fm.get("type") or "concept") - fm["type"] = note_type or "concept" - try: - cfg_type = get_type_config(note_type, reg) - except Exception: - cfg_type = {} - prof = effective_chunk_profile(note_type, reg) - if prof: - fm["chunk_profile"] = prof - weight = cfg_type.get("retriever_weight") - if weight is not None: - fm["retriever_weight"] = float(weight) - - # Ensure note-level payload reflects registry fields - try: - if isinstance(note_pl, dict): - if fm.get("type"): - note_pl["type"] = fm["type"] - if fm.get("chunk_profile"): - note_pl["chunk_profile"] = fm["chunk_profile"] - if "retriever_weight" in fm: - note_pl["retriever_weight"] = fm["retriever_weight"] - except Exception: - pass - chunks = assemble_chunks(fm["id"], body_text, fm.get("type", "concept")) chunk_pls = make_chunk_payloads(fm, note_pl["path"], chunks, note_text=body_text) except Exception as e: @@ -419,7 +370,6 @@ def main() -> None: except Exception as e: print(json.dumps({"path": path, "note_id": note_id, "warn": f"embed_texts failed, using zeros: {e}"})) - # -------- Edges (robust) -------- edges: List[Dict[str, Any]] = [] edges_failed = False @@ -497,4 +447,32 @@ def main() -> None: if __name__ == "__main__": - main() \ No newline at end of file + main() + + +# --- Type-Registry helper shims (safe if unused) --- + +def effective_chunk_profile(note_type: str, registry: dict) -> str | None: + try: + reg = registry or {} + types = reg.get("types", {}) if isinstance(reg, dict) else {} + # take exact type or fallback to concept + cfg = types.get(note_type) or types.get("concept") or {} + prof = cfg.get("chunk_profile") + if isinstance(prof, str) and prof in {"short", "medium", "long"}: + return prof + except Exception: + pass + return None + +def effective_retriever_weight(note_type: str, registry: dict) -> float | None: + try: + reg = registry or {} + types = reg.get("types", {}) if isinstance(reg, dict) else {} + cfg = types.get(note_type) or types.get("concept") or {} + w = cfg.get("retriever_weight") + if w is None: + return None + return float(w) + except Exception: + return None