Dateien nach "app/core" hochladen
All checks were successful
Deploy mindnet to llm-node / deploy (push) Successful in 4s
All checks were successful
Deploy mindnet to llm-node / deploy (push) Successful in 4s
This commit is contained in:
parent
b8126e00c4
commit
ead1b8c1bc
|
|
@ -1,14 +1,18 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
app/core/qdrant_points.py — robust points helpers for Qdrant
|
||||
app/core/qdrant_points.py - robust points helpers for Qdrant
|
||||
|
||||
- Single source of truth for building PointStruct for notes/chunks/edges
|
||||
- Backward-compatible to older payload schemas for edges
|
||||
- NEW: Upsert path auto-detects collection vector schema (single vs named vectors)
|
||||
and coerces points accordingly to avoid 'Not existing vector name' errors.
|
||||
- Backward-compatible payloads for edges
|
||||
- Handles both Single-Vector and Named-Vector collections
|
||||
- Deterministic overrides via ENV to avoid auto-detection traps:
|
||||
* NOTES_VECTOR_NAME, CHUNKS_VECTOR_NAME, EDGES_VECTOR_NAME
|
||||
* MINDNET_VECTOR_NAME (fallback)
|
||||
> Set to a concrete name (e.g. "text") to force Named-Vector with that name
|
||||
> Set to "__single__" (or "single") to force Single-Vector
|
||||
|
||||
Version: 1.4.0 (2025-11-08)
|
||||
Version: 1.5.0 (2025-11-08)
|
||||
"""
|
||||
from __future__ import annotations
|
||||
import os
|
||||
|
|
@ -21,16 +25,14 @@ from qdrant_client import QdrantClient
|
|||
# --------------------- ID helpers ---------------------
|
||||
|
||||
def _to_uuid(stable_key: str) -> str:
|
||||
"""Deterministic UUIDv5 from a stable string key."""
|
||||
return str(uuid.uuid5(uuid.NAMESPACE_URL, stable_key))
|
||||
|
||||
def _names(prefix: str) -> Tuple[str, str, str]:
|
||||
return f"{prefix}_notes", f"{prefix}_chunks", f"{prefix}_edges"
|
||||
|
||||
# --------------------- Notes / Chunks ---------------------
|
||||
# --------------------- Points builders ---------------------
|
||||
|
||||
def points_for_note(prefix: str, note_payload: dict, note_vec: List[float] | None, dim: int) -> Tuple[str, List[rest.PointStruct]]:
|
||||
"""Notes-Collection: if no note embedding -> zero vector of length dim."""
|
||||
notes_col, _, _ = _names(prefix)
|
||||
vector = note_vec if note_vec is not None else [0.0] * int(dim)
|
||||
raw_note_id = note_payload.get("note_id") or note_payload.get("id") or "missing-note-id"
|
||||
|
|
@ -39,7 +41,6 @@ def points_for_note(prefix: str, note_payload: dict, note_vec: List[float] | Non
|
|||
return notes_col, [pt]
|
||||
|
||||
def points_for_chunks(prefix: str, chunk_payloads: List[dict], vectors: List[List[float]]) -> Tuple[str, List[rest.PointStruct]]:
|
||||
"""Create point structs for the chunk collection (expects one vector per chunk)."""
|
||||
_, chunks_col, _ = _names(prefix)
|
||||
points: List[rest.PointStruct] = []
|
||||
for i, (pl, vec) in enumerate(zip(chunk_payloads, vectors), start=1):
|
||||
|
|
@ -52,10 +53,7 @@ def points_for_chunks(prefix: str, chunk_payloads: List[dict], vectors: List[Lis
|
|||
points.append(rest.PointStruct(id=point_id, vector=vec, payload=pl))
|
||||
return chunks_col, points
|
||||
|
||||
# --------------------- Edges ---------------------
|
||||
|
||||
def _normalize_edge_payload(pl: dict) -> dict:
|
||||
"""Normalize edge payload keys to a common schema."""
|
||||
kind = pl.get("kind") or pl.get("edge_type") or "edge"
|
||||
source_id = pl.get("source_id") or pl.get("src_id") or "unknown-src"
|
||||
target_id = pl.get("target_id") or pl.get("dst_id") or "unknown-tgt"
|
||||
|
|
@ -69,7 +67,6 @@ def _normalize_edge_payload(pl: dict) -> dict:
|
|||
return pl
|
||||
|
||||
def points_for_edges(prefix: str, edge_payloads: List[dict]) -> Tuple[str, List[rest.PointStruct]]:
|
||||
"""Edges collection (1D dummy vector)."""
|
||||
_, _, edges_col = _names(prefix)
|
||||
points: List[rest.PointStruct] = []
|
||||
for raw in edge_payloads:
|
||||
|
|
@ -86,32 +83,47 @@ def points_for_edges(prefix: str, edge_payloads: List[dict]) -> Tuple[str, List[
|
|||
points.append(rest.PointStruct(id=point_id, vector=[0.0], payload=pl))
|
||||
return edges_col, points
|
||||
|
||||
# --------------------- Vector schema detection ---------------------
|
||||
# --------------------- Vector schema & overrides ---------------------
|
||||
|
||||
def _preferred_name(candidates: List[str]) -> str:
|
||||
"""Pick a preferred vector name using env overrides then common fallbacks."""
|
||||
env_prefs = [
|
||||
os.getenv("NOTES_VECTOR_NAME"),
|
||||
os.getenv("CHUNKS_VECTOR_NAME"),
|
||||
os.getenv("EDGES_VECTOR_NAME"),
|
||||
os.getenv("MINDNET_VECTOR_NAME"),
|
||||
os.getenv("QDRANT_VECTOR_NAME"),
|
||||
]
|
||||
for p in env_prefs:
|
||||
if p and p in candidates:
|
||||
return p
|
||||
for k in ("text", "default", "embedding", "content"):
|
||||
if k in candidates:
|
||||
return k
|
||||
return sorted(candidates)[0]
|
||||
|
||||
def _env_override_for_collection(collection: str) -> Optional[str]:
|
||||
"""
|
||||
Returns:
|
||||
- "__single__" to force single-vector
|
||||
- concrete name (str) to force named-vector with that name
|
||||
- None to auto-detect
|
||||
"""
|
||||
base = os.getenv("MINDNET_VECTOR_NAME")
|
||||
if collection.endswith("_notes"):
|
||||
base = os.getenv("NOTES_VECTOR_NAME", base)
|
||||
elif collection.endswith("_chunks"):
|
||||
base = os.getenv("CHUNKS_VECTOR_NAME", base)
|
||||
elif collection.endswith("_edges"):
|
||||
base = os.getenv("EDGES_VECTOR_NAME", base)
|
||||
|
||||
if not base:
|
||||
return None
|
||||
val = base.strip()
|
||||
if val.lower() in ("__single__", "single"):
|
||||
return "__single__"
|
||||
return val # concrete name
|
||||
|
||||
def _get_vector_schema(client: QdrantClient, collection_name: str) -> dict:
|
||||
"""Return {"kind": "single", "size": int} or {"kind": "named", "names": [...], "primary": str}."""
|
||||
"""
|
||||
Return {"kind": "single", "size": int} or {"kind": "named", "names": [...], "primary": str}.
|
||||
"""
|
||||
try:
|
||||
info = client.get_collection(collection_name=collection_name)
|
||||
vecs = getattr(info, "vectors", None)
|
||||
# Single-vector config
|
||||
if hasattr(vecs, "size") and isinstance(vecs.size, int):
|
||||
return {"kind": "single", "size": vecs.size}
|
||||
# Named-vectors config (dict-like in .config)
|
||||
cfg = getattr(vecs, "config", None)
|
||||
if isinstance(cfg, dict) and cfg:
|
||||
names = list(cfg.keys())
|
||||
|
|
@ -121,67 +133,51 @@ def _get_vector_schema(client: QdrantClient, collection_name: str) -> dict:
|
|||
pass
|
||||
return {"kind": "single", "size": None}
|
||||
|
||||
def _coerce_for_collection(client: QdrantClient, collection_name: str, points: List[rest.PointStruct]) -> List[rest.PointStruct]:
|
||||
"""If collection uses named vectors, convert vector=[...] -> vector={name: [...]}"""
|
||||
try:
|
||||
schema = _get_vector_schema(client, collection_name)
|
||||
if schema.get("kind") != "named":
|
||||
return points
|
||||
primary = schema.get("primary")
|
||||
if not primary:
|
||||
return points
|
||||
fixed: List[rest.PointStruct] = []
|
||||
for pt in points:
|
||||
vec = getattr(pt, "vector", None)
|
||||
if isinstance(vec, dict):
|
||||
fixed.append(pt) # already named
|
||||
elif vec is not None:
|
||||
fixed.append(rest.PointStruct(id=pt.id, vector={primary: vec}, payload=pt.payload))
|
||||
def _as_named(points: List[rest.PointStruct], name: str) -> List[rest.PointStruct]:
|
||||
out: List[rest.PointStruct] = []
|
||||
for pt in points:
|
||||
vec = getattr(pt, "vector", None)
|
||||
if isinstance(vec, dict):
|
||||
if name in vec:
|
||||
out.append(pt)
|
||||
else:
|
||||
fixed.append(pt) # edges with no vector (shouldn't happen) or already correct
|
||||
return fixed
|
||||
except Exception:
|
||||
return points
|
||||
# take any existing entry; if empty dict fallback to [0.0]
|
||||
fallback_vec = None
|
||||
try:
|
||||
fallback_vec = list(next(iter(vec.values())))
|
||||
except Exception:
|
||||
fallback_vec = [0.0]
|
||||
out.append(rest.PointStruct(id=pt.id, vector={name: fallback_vec}, payload=pt.payload))
|
||||
elif vec is not None:
|
||||
out.append(rest.PointStruct(id=pt.id, vector={name: vec}, payload=pt.payload))
|
||||
else:
|
||||
out.append(pt)
|
||||
return out
|
||||
|
||||
|
||||
def _try_upsert_with_names(client: QdrantClient, collection: str, points: List[rest.PointStruct]) -> None:
|
||||
schema = _get_vector_schema(client, collection)
|
||||
if schema.get("kind") != "named":
|
||||
raise
|
||||
names = schema.get("names") or []
|
||||
# prefer env-defined names first
|
||||
pref = _preferred_name(names)
|
||||
order = [pref] + [n for n in names if n != pref]
|
||||
for name in order:
|
||||
converted: List[rest.PointStruct] = []
|
||||
for pt in points:
|
||||
vec = getattr(pt, "vector", None)
|
||||
if isinstance(vec, dict) and name in vec:
|
||||
converted.append(pt)
|
||||
elif vec is not None:
|
||||
converted.append(rest.PointStruct(id=pt.id, vector={name: vec}, payload=pt.payload))
|
||||
else:
|
||||
converted.append(pt)
|
||||
try:
|
||||
client.upsert(collection_name=collection, points=converted, wait=True)
|
||||
return
|
||||
except Exception:
|
||||
continue
|
||||
raise
|
||||
# --------------------- Qdrant ops ---------------------
|
||||
|
||||
def upsert_batch(client: QdrantClient, collection: str, points: List[rest.PointStruct]) -> None:
|
||||
if not points:
|
||||
return
|
||||
pts = _coerce_for_collection(client, collection, points)
|
||||
try:
|
||||
client.upsert(collection_name=collection, points=pts, wait=True)
|
||||
except Exception as e:
|
||||
msg = str(e)
|
||||
if "Not existing vector name" in msg or "named vector" in msg:
|
||||
_try_upsert_with_names(client, collection, points)
|
||||
else:
|
||||
raise
|
||||
|
||||
# 1) ENV overrides come first
|
||||
override = _env_override_for_collection(collection)
|
||||
if override == "__single__":
|
||||
client.upsert(collection_name=collection, points=points, wait=True)
|
||||
return
|
||||
elif isinstance(override, str):
|
||||
client.upsert(collection_name=collection, points=_as_named(points, override), wait=True)
|
||||
return
|
||||
|
||||
# 2) Auto-detect schema
|
||||
schema = _get_vector_schema(client, collection)
|
||||
if schema.get("kind") == "named":
|
||||
name = schema.get("primary") or _preferred_name(schema.get("names") or [])
|
||||
client.upsert(collection_name=collection, points=_as_named(points, name), wait=True)
|
||||
return
|
||||
|
||||
# 3) Fallback single-vector
|
||||
client.upsert(collection_name=collection, points=points, wait=True)
|
||||
|
||||
# --- Optional search helpers ---
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user