diff --git a/app/core/qdrant_points.py b/app/core/qdrant_points.py index 64a4fd2..4c3b9fe 100644 --- a/app/core/qdrant_points.py +++ b/app/core/qdrant_points.py @@ -1,14 +1,18 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- """ -app/core/qdrant_points.py — robust points helpers for Qdrant +app/core/qdrant_points.py - robust points helpers for Qdrant - Single source of truth for building PointStruct for notes/chunks/edges -- Backward-compatible to older payload schemas for edges -- NEW: Upsert path auto-detects collection vector schema (single vs named vectors) - and coerces points accordingly to avoid 'Not existing vector name' errors. +- Backward-compatible payloads for edges +- Handles both Single-Vector and Named-Vector collections +- Deterministic overrides via ENV to avoid auto-detection traps: + * NOTES_VECTOR_NAME, CHUNKS_VECTOR_NAME, EDGES_VECTOR_NAME + * MINDNET_VECTOR_NAME (fallback) + > Set to a concrete name (e.g. "text") to force Named-Vector with that name + > Set to "__single__" (or "single") to force Single-Vector -Version: 1.4.0 (2025-11-08) +Version: 1.5.0 (2025-11-08) """ from __future__ import annotations import os @@ -21,16 +25,14 @@ from qdrant_client import QdrantClient # --------------------- ID helpers --------------------- def _to_uuid(stable_key: str) -> str: - """Deterministic UUIDv5 from a stable string key.""" return str(uuid.uuid5(uuid.NAMESPACE_URL, stable_key)) def _names(prefix: str) -> Tuple[str, str, str]: return f"{prefix}_notes", f"{prefix}_chunks", f"{prefix}_edges" -# --------------------- Notes / Chunks --------------------- +# --------------------- Points builders --------------------- def points_for_note(prefix: str, note_payload: dict, note_vec: List[float] | None, dim: int) -> Tuple[str, List[rest.PointStruct]]: - """Notes-Collection: if no note embedding -> zero vector of length dim.""" notes_col, _, _ = _names(prefix) vector = note_vec if note_vec is not None else [0.0] * int(dim) raw_note_id = note_payload.get("note_id") or note_payload.get("id") or "missing-note-id" @@ -39,7 +41,6 @@ def points_for_note(prefix: str, note_payload: dict, note_vec: List[float] | Non return notes_col, [pt] def points_for_chunks(prefix: str, chunk_payloads: List[dict], vectors: List[List[float]]) -> Tuple[str, List[rest.PointStruct]]: - """Create point structs for the chunk collection (expects one vector per chunk).""" _, chunks_col, _ = _names(prefix) points: List[rest.PointStruct] = [] for i, (pl, vec) in enumerate(zip(chunk_payloads, vectors), start=1): @@ -52,10 +53,7 @@ def points_for_chunks(prefix: str, chunk_payloads: List[dict], vectors: List[Lis points.append(rest.PointStruct(id=point_id, vector=vec, payload=pl)) return chunks_col, points -# --------------------- Edges --------------------- - def _normalize_edge_payload(pl: dict) -> dict: - """Normalize edge payload keys to a common schema.""" kind = pl.get("kind") or pl.get("edge_type") or "edge" source_id = pl.get("source_id") or pl.get("src_id") or "unknown-src" target_id = pl.get("target_id") or pl.get("dst_id") or "unknown-tgt" @@ -69,7 +67,6 @@ def _normalize_edge_payload(pl: dict) -> dict: return pl def points_for_edges(prefix: str, edge_payloads: List[dict]) -> Tuple[str, List[rest.PointStruct]]: - """Edges collection (1D dummy vector).""" _, _, edges_col = _names(prefix) points: List[rest.PointStruct] = [] for raw in edge_payloads: @@ -86,32 +83,47 @@ def points_for_edges(prefix: str, edge_payloads: List[dict]) -> Tuple[str, List[ points.append(rest.PointStruct(id=point_id, vector=[0.0], payload=pl)) return edges_col, points -# --------------------- Vector schema detection --------------------- +# --------------------- Vector schema & overrides --------------------- def _preferred_name(candidates: List[str]) -> str: - """Pick a preferred vector name using env overrides then common fallbacks.""" - env_prefs = [ - os.getenv("NOTES_VECTOR_NAME"), - os.getenv("CHUNKS_VECTOR_NAME"), - os.getenv("EDGES_VECTOR_NAME"), - os.getenv("MINDNET_VECTOR_NAME"), - os.getenv("QDRANT_VECTOR_NAME"), - ] - for p in env_prefs: - if p and p in candidates: - return p for k in ("text", "default", "embedding", "content"): if k in candidates: return k return sorted(candidates)[0] +def _env_override_for_collection(collection: str) -> Optional[str]: + """ + Returns: + - "__single__" to force single-vector + - concrete name (str) to force named-vector with that name + - None to auto-detect + """ + base = os.getenv("MINDNET_VECTOR_NAME") + if collection.endswith("_notes"): + base = os.getenv("NOTES_VECTOR_NAME", base) + elif collection.endswith("_chunks"): + base = os.getenv("CHUNKS_VECTOR_NAME", base) + elif collection.endswith("_edges"): + base = os.getenv("EDGES_VECTOR_NAME", base) + + if not base: + return None + val = base.strip() + if val.lower() in ("__single__", "single"): + return "__single__" + return val # concrete name + def _get_vector_schema(client: QdrantClient, collection_name: str) -> dict: - """Return {"kind": "single", "size": int} or {"kind": "named", "names": [...], "primary": str}.""" + """ + Return {"kind": "single", "size": int} or {"kind": "named", "names": [...], "primary": str}. + """ try: info = client.get_collection(collection_name=collection_name) vecs = getattr(info, "vectors", None) + # Single-vector config if hasattr(vecs, "size") and isinstance(vecs.size, int): return {"kind": "single", "size": vecs.size} + # Named-vectors config (dict-like in .config) cfg = getattr(vecs, "config", None) if isinstance(cfg, dict) and cfg: names = list(cfg.keys()) @@ -121,67 +133,51 @@ def _get_vector_schema(client: QdrantClient, collection_name: str) -> dict: pass return {"kind": "single", "size": None} -def _coerce_for_collection(client: QdrantClient, collection_name: str, points: List[rest.PointStruct]) -> List[rest.PointStruct]: - """If collection uses named vectors, convert vector=[...] -> vector={name: [...]}""" - try: - schema = _get_vector_schema(client, collection_name) - if schema.get("kind") != "named": - return points - primary = schema.get("primary") - if not primary: - return points - fixed: List[rest.PointStruct] = [] - for pt in points: - vec = getattr(pt, "vector", None) - if isinstance(vec, dict): - fixed.append(pt) # already named - elif vec is not None: - fixed.append(rest.PointStruct(id=pt.id, vector={primary: vec}, payload=pt.payload)) +def _as_named(points: List[rest.PointStruct], name: str) -> List[rest.PointStruct]: + out: List[rest.PointStruct] = [] + for pt in points: + vec = getattr(pt, "vector", None) + if isinstance(vec, dict): + if name in vec: + out.append(pt) else: - fixed.append(pt) # edges with no vector (shouldn't happen) or already correct - return fixed - except Exception: - return points + # take any existing entry; if empty dict fallback to [0.0] + fallback_vec = None + try: + fallback_vec = list(next(iter(vec.values()))) + except Exception: + fallback_vec = [0.0] + out.append(rest.PointStruct(id=pt.id, vector={name: fallback_vec}, payload=pt.payload)) + elif vec is not None: + out.append(rest.PointStruct(id=pt.id, vector={name: vec}, payload=pt.payload)) + else: + out.append(pt) + return out - -def _try_upsert_with_names(client: QdrantClient, collection: str, points: List[rest.PointStruct]) -> None: - schema = _get_vector_schema(client, collection) - if schema.get("kind") != "named": - raise - names = schema.get("names") or [] - # prefer env-defined names first - pref = _preferred_name(names) - order = [pref] + [n for n in names if n != pref] - for name in order: - converted: List[rest.PointStruct] = [] - for pt in points: - vec = getattr(pt, "vector", None) - if isinstance(vec, dict) and name in vec: - converted.append(pt) - elif vec is not None: - converted.append(rest.PointStruct(id=pt.id, vector={name: vec}, payload=pt.payload)) - else: - converted.append(pt) - try: - client.upsert(collection_name=collection, points=converted, wait=True) - return - except Exception: - continue - raise # --------------------- Qdrant ops --------------------- def upsert_batch(client: QdrantClient, collection: str, points: List[rest.PointStruct]) -> None: if not points: return - pts = _coerce_for_collection(client, collection, points) - try: - client.upsert(collection_name=collection, points=pts, wait=True) - except Exception as e: - msg = str(e) - if "Not existing vector name" in msg or "named vector" in msg: - _try_upsert_with_names(client, collection, points) - else: - raise + + # 1) ENV overrides come first + override = _env_override_for_collection(collection) + if override == "__single__": + client.upsert(collection_name=collection, points=points, wait=True) + return + elif isinstance(override, str): + client.upsert(collection_name=collection, points=_as_named(points, override), wait=True) + return + + # 2) Auto-detect schema + schema = _get_vector_schema(client, collection) + if schema.get("kind") == "named": + name = schema.get("primary") or _preferred_name(schema.get("names") or []) + client.upsert(collection_name=collection, points=_as_named(points, name), wait=True) + return + + # 3) Fallback single-vector + client.upsert(collection_name=collection, points=points, wait=True) # --- Optional search helpers ---