diff --git a/scripts/import_markdown.py b/scripts/import_markdown.py index 2ace4d1..688ac4d 100644 --- a/scripts/import_markdown.py +++ b/scripts/import_markdown.py @@ -294,32 +294,64 @@ def _resolve_dim(cfg) -> int: # ---- Qdrant vector schema detection & point coercion ---- + def _get_vector_schema(client, collection_name: str): """ Returns dict describing vector schema: {"kind": "single", "size": } or {"kind": "named", "names": [..]}. - Falls back to single if detection fails. + Fallbacks to single if detection fails. Honors env override MINDNET_VECTOR_NAME. """ + # Env override for preferred name + prefer = os.environ.get("MINDNET_VECTOR_NAME") or os.environ.get("QDRANT_VECTOR_NAME") try: info = client.get_collection(collection_name=collection_name) vecs = getattr(info, "vectors", None) - # Single-vector config + # Single-vector config (VectorParams) if hasattr(vecs, "size") and isinstance(vecs.size, int): return {"kind": "single", "size": vecs.size} - # Named-vectors config - cfg = getattr(vecs, "config", None) - if isinstance(cfg, dict) and cfg: - names = list(cfg.keys()) - if names: - return {"kind": "named", "names": names} + # Try common mappings for named vectors + def _try_names(obj): + if obj is None: + return [] + # direct dict + if isinstance(obj, dict): + return list(obj.keys()) + # object with dict-like attributes + for cand in ("config", "configs", "vectors", "map", "data"): + sub = getattr(obj, cand, None) + if isinstance(sub, dict) and sub: + return list(sub.keys()) + # last resort: inspect __dict__ for dicts + try: + for v in getattr(obj, "__dict__", {}).values(): + if isinstance(v, dict) and v: + return list(v.keys()) + except Exception: + pass + return [] + names = _try_names(vecs) + # Prefer known names if present + if names: + pref = None + if prefer and prefer in names: + pref = prefer + else: + for k in ("text", "default", "embedding", "content"): + if k in names: + pref = k + break + if pref is None: + pref = sorted(names)[0] + return {"kind": "named", "names": names, "primary": pref} except Exception: pass return {"kind": "single", "size": None} + def _coerce_points_for_collection(client, collection_name: str, points: list): """ - If collection uses named vectors, wrap each point's .vector into .vectors{: vector}. + If collection uses named vectors, wrap each point's .vector into .vector={name: vector}. Leaves points without vectors (e.g., edges) untouched. """ try: @@ -329,12 +361,18 @@ def _coerce_points_for_collection(client, collection_name: str, points: list): names = schema.get("names") or [] if not names: return points - primary = names[0] + primary = schema.get("primary") or names[0] fixed = [] for pt in points: vec = getattr(pt, "vector", None) - if vec is not None: - fixed.append(rest.PointStruct(id=pt.id, vectors={primary: vec}, payload=pt.payload)) + # Some client builds may store named vectors in .vectors already; keep if correct + vectors_attr = getattr(pt, "vectors", None) + if isinstance(vectors_attr, dict) and vectors_attr: + fixed.append(pt) + continue + if vec is not None and not isinstance(vec, dict): + # convert to named map + fixed.append(rest.PointStruct(id=pt.id, vector={primary: vec}, payload=pt.payload)) else: fixed.append(pt) return fixed