diff --git a/scripts/import_markdown.py b/scripts/import_markdown.py index 88876f5..2df85c8 100644 --- a/scripts/import_markdown.py +++ b/scripts/import_markdown.py @@ -219,6 +219,26 @@ def _resolve_mode(val: Optional[str]) -> str: def _env(key: str, default: str) -> str: + +def _resolve_dim(cfg) -> int: + # Try common attribute names on QdrantConfig + for attr in ("dim", "vector_dim", "dimension", "dimensions", "embedding_dim", "embed_dim", "vector_size", "size"): + try: + v = getattr(cfg, attr) + if isinstance(v, int) and v > 0: + return v + except Exception: + pass + # Try environment fallbacks + for key in ("MINDNET_DIM", "EMBED_DIM", "EMBEDDING_DIM", "QDRANT_VECTOR_DIM", "QDRANT_DIM", "VECTOR_DIM", "DIM"): + try: + v = int(os.environ.get(key, "").strip() or "0") + if v > 0: + return v + except Exception: + continue + # Conservative default: MiniLM 384d (im Projekt üblich) + return 384 return (os.environ.get(key) or default).strip().lower() @@ -267,7 +287,15 @@ def main() -> None: if args.prefix: cfg.prefix = args.prefix.strip() client = get_client(cfg) - ensure_collections(client, cfg.prefix, cfg.dim) + dim = _resolve_dim(cfg) + # ensure_collections signature compatibility + try: + ensure_collections(client, cfg.prefix, dim) + except TypeError: + try: + ensure_collections(client, cfg.prefix) + except TypeError: + ensure_collections(client) # abwärtskompatible Index-Erstellung _ensure_payload_indexes(client, cfg.prefix) @@ -408,7 +436,7 @@ def main() -> None: print(json.dumps({"path": path, "note_id": note_id, "error": f"chunk build failed: {type(e).__name__}: {e}"})) continue - vecs: List[List[float]] = [[0.0] * cfg.dim for _ in chunk_pls] + vecs: List[List[float]] = [[0.0] * dim for _ in chunk_pls] if embed_texts and chunk_pls: try: texts_for_embed = [(pl.get("window") or pl.get("text") or "") for pl in chunk_pls] @@ -473,8 +501,8 @@ def main() -> None: note_pl["hash_fulltext"] = old_payload.get("hash_fulltext", note_pl.get("hash_fulltext")) note_pl["hash_signature"] = old_payload.get("hash_signature", note_pl.get("hash_signature")) note_pl["hashes"] = merged_hashes - notes_name, note_pts = points_for_note(cfg.prefix, note_pl, None, cfg.dim) - upsert_batch(client, notes_name, note_pts) + notes_name, note_pts = points_for_note(cfg.prefix, note_pl, None, dim) + _upsert_batch(client, notes_name, note_pts) continue if not changed: @@ -486,14 +514,14 @@ def main() -> None: except Exception as e: print(json.dumps({"path": path, "note_id": note_id, "warn": f"purge failed: {e}"})) - notes_name, note_pts = points_for_note(cfg.prefix, note_pl, None, cfg.dim) - upsert_batch(client, notes_name, note_pts) + notes_name, note_pts = points_for_note(cfg.prefix, note_pl, None, dim) + _upsert_batch(client, notes_name, note_pts) if chunk_pls: - chunks_name, chunk_pts = points_for_chunks(cfg.prefix, chunk_pls, vecs) - upsert_batch(client, chunks_name, chunk_pts) + chunks_name, chunk_pts = _points_for_chunks(cfg.prefix, chunk_pls, vecs) + _upsert_batch(client, chunks_name, chunk_pts) if edges: - edges_name, edge_pts = points_for_edges(cfg.prefix, edges) - upsert_batch(client, edges_name, edge_pts) + edges_name, edge_pts = _points_for_edges(cfg.prefix, edges) + _upsert_batch(client, edges_name, edge_pts) print(f"Done. Processed notes: {processed}")