diff --git a/scripts/import_markdown.py b/scripts/import_markdown.py index 3b7a8eb..898c479 100644 --- a/scripts/import_markdown.py +++ b/scripts/import_markdown.py @@ -138,9 +138,10 @@ except Exception: return edges_col, pts def _upsert_batch(client, collection_name: str, points: list): - if not points: - return - client.upsert(collection_name=collection_name, points=points, wait=True) + if not points: + return + pts = _coerce_points_for_collection(client, collection_name, points) + client.upsert(collection_name=collection_name, points=pts, wait=True) # Type-Registry (optional) try: @@ -281,6 +282,55 @@ def _resolve_dim(cfg) -> int: # ---- Compatibility wrappers (no direct calls to project-specific signatures) ---- + +# ---- Qdrant vector schema detection & point coercion ---- +def _get_vector_schema(client, collection_name: str): + """ + Returns dict describing vector schema: + {"kind": "single", "size": } or {"kind": "named", "names": [..]}. + Falls back to single if detection fails. + """ + try: + info = client.get_collection(collection_name=collection_name) + vecs = getattr(info, "vectors", None) + if hasattr(vecs, "size") and isinstance(vecs.size, int): + return {"kind": "single", "size": vecs.size} + if hasattr(vecs, "config"): + # NamedVectors as dict-like in .config + names = list(getattr(vecs, "config", {}).keys()) + if names: + return {"kind": "named", "names": names} + except Exception: + pass + return {"kind": "single", "size": None} + +def _coerce_points_for_collection(client, collection_name: str, points: list): + """ + If collection uses named vectors, wrap each point's .vector into .vectors{: vector}. + Leaves points without vectors (e.g., edges) untouched. + """ + try: + schema = _get_vector_schema(client, collection_name) + if schema.get("kind") != "named": + return points + names = schema.get("names") or [] + if not names: + return points + primary = names[0] + from qdrant_client.http import models as _rest + fixed = [] + for pt in points: + # pt may be a dataclass; create a new PointStruct when needed + vec = getattr(pt, "vector", None) + if vec is not None: + fixed.append(_rest.PointStruct(id=pt.id, vectors={primary: vec}, payload=pt.payload)) + else: + # keep as-is (no vectors) + fixed.append(pt) + return fixed + except Exception: + return points + def _ensure_collections_compat(client, cfg, dim): """ Call ensure_collections with the correct signature across releases: