diff --git a/scripts/import_markdown.py b/scripts/import_markdown.py index 688ac4d..e9fed5c 100644 --- a/scripts/import_markdown.py +++ b/scripts/import_markdown.py @@ -152,8 +152,8 @@ except Exception: def _upsert_batch(client, collection_name: str, points: list): if not points: return - pts = _coerce_points_for_collection(client, collection_name, points) - client.upsert(collection_name=collection_name, points=pts, wait=True) + # Collections are single-vector (size/distance) per original setup script. + client.upsert(collection_name=collection_name, points=points, wait=True) # Type-Registry (optional) try: @@ -293,93 +293,6 @@ def _resolve_dim(cfg) -> int: return 384 -# ---- Qdrant vector schema detection & point coercion ---- - -def _get_vector_schema(client, collection_name: str): - """ - Returns dict describing vector schema: - {"kind": "single", "size": } or {"kind": "named", "names": [..]}. - Fallbacks to single if detection fails. Honors env override MINDNET_VECTOR_NAME. - """ - # Env override for preferred name - prefer = os.environ.get("MINDNET_VECTOR_NAME") or os.environ.get("QDRANT_VECTOR_NAME") - try: - info = client.get_collection(collection_name=collection_name) - vecs = getattr(info, "vectors", None) - # Single-vector config (VectorParams) - if hasattr(vecs, "size") and isinstance(vecs.size, int): - return {"kind": "single", "size": vecs.size} - # Try common mappings for named vectors - def _try_names(obj): - if obj is None: - return [] - # direct dict - if isinstance(obj, dict): - return list(obj.keys()) - # object with dict-like attributes - for cand in ("config", "configs", "vectors", "map", "data"): - sub = getattr(obj, cand, None) - if isinstance(sub, dict) and sub: - return list(sub.keys()) - # last resort: inspect __dict__ for dicts - try: - for v in getattr(obj, "__dict__", {}).values(): - if isinstance(v, dict) and v: - return list(v.keys()) - except Exception: - pass - return [] - names = _try_names(vecs) - # Prefer known names if present - if names: - pref = None - if prefer and prefer in names: - pref = prefer - else: - for k in ("text", "default", "embedding", "content"): - if k in names: - pref = k - break - if pref is None: - pref = sorted(names)[0] - return {"kind": "named", "names": names, "primary": pref} - except Exception: - pass - return {"kind": "single", "size": None} - - - -def _coerce_points_for_collection(client, collection_name: str, points: list): - """ - If collection uses named vectors, wrap each point's .vector into .vector={name: vector}. - Leaves points without vectors (e.g., edges) untouched. - """ - try: - schema = _get_vector_schema(client, collection_name) - if schema.get("kind") != "named": - return points - names = schema.get("names") or [] - if not names: - return points - primary = schema.get("primary") or names[0] - fixed = [] - for pt in points: - vec = getattr(pt, "vector", None) - # Some client builds may store named vectors in .vectors already; keep if correct - vectors_attr = getattr(pt, "vectors", None) - if isinstance(vectors_attr, dict) and vectors_attr: - fixed.append(pt) - continue - if vec is not None and not isinstance(vec, dict): - # convert to named map - fixed.append(rest.PointStruct(id=pt.id, vector={primary: vec}, payload=pt.payload)) - else: - fixed.append(pt) - return fixed - except Exception: - return points - - # ---- Compatibility wrappers (no direct calls to project-specific signatures) ---- def _ensure_collections_compat(client, cfg, dim): """ @@ -707,4 +620,4 @@ def main() -> None: if __name__ == "__main__": - main() + main() \ No newline at end of file