mindnet/app/core/qdrant.py
Lars 98f719e42f
Some checks failed
Deploy mindnet to llm-node / deploy (push) Failing after 2s
app/core/qdrant.py aktualisiert
2025-09-04 08:37:10 +02:00

121 lines
3.9 KiB
Python

# app/core/qdrant.py
from __future__ import annotations
import os
from dataclasses import dataclass
from typing import Optional
from qdrant_client import QdrantClient
from qdrant_client.http import models as rest
# -------------------------------
# Konfiguration
# -------------------------------
@dataclass
class QdrantConfig:
url: str
api_key: Optional[str]
prefix: str
dim: int
@staticmethod
def from_env() -> "QdrantConfig":
# URL (bevorzugt) oder Host/Port
url = os.getenv("QDRANT_URL")
if not url:
host = os.getenv("QDRANT_HOST", "127.0.0.1")
port = int(os.getenv("QDRANT_PORT", "6333"))
url = f"http://{host}:{port}"
api_key = os.getenv("QDRANT_API_KEY") or None
# Collection-Prefix und Vektor-Dimension
prefix = os.getenv("COLLECTION_PREFIX", "mindnet")
dim = int(os.getenv("VECTOR_DIM", "384")) # MiniLM-384 by default
return QdrantConfig(url=url, api_key=api_key, prefix=prefix, dim=dim)
# -------------------------------
# Client / Setup
# -------------------------------
def get_client(cfg: QdrantConfig) -> QdrantClient:
"""
Erstellt einen QdrantClient basierend auf der Config.
"""
return QdrantClient(url=cfg.url, api_key=cfg.api_key)
def ensure_collections(client: QdrantClient, prefix: str, dim: int) -> None:
"""
Stellt sicher, dass die drei Collections existieren:
- {prefix}_notes : Vektor-Dim = dim (COSINE)
- {prefix}_chunks : Vektor-Dim = dim (COSINE)
- {prefix}_edges : Vektor-Dim = 1 (DOT) <-- Dummy-Vektor, damit der Python-Client kein 'vector' zwingt
Falls {prefix}_edges bereits vektorlos existiert, wird sie gelöscht und mit 1D neu erstellt.
"""
notes = f"{prefix}_notes"
chunks = f"{prefix}_chunks"
edges = f"{prefix}_edges"
# Notes
if not client.collection_exists(notes):
client.create_collection(
collection_name=notes,
vectors_config=rest.VectorParams(size=dim, distance=rest.Distance.COSINE),
)
# Chunks
if not client.collection_exists(chunks):
client.create_collection(
collection_name=chunks,
vectors_config=rest.VectorParams(size=dim, distance=rest.Distance.COSINE),
)
# Edges: 1D Dummy-Vektor (Workaround)
recreate_edges = False
if client.collection_exists(edges):
try:
info = client.get_collection(edges)
# Prüfen, ob Vektor-Konfig existiert
vectors_cfg = getattr(getattr(info.result, "config", None), "params", None)
# Neuere Clients: info.result.config.params.vectors kann VectorParams oder dict sein
has_vectors = getattr(vectors_cfg, "vectors", None) is not None
if not has_vectors:
recreate_edges = True
except Exception:
# Wenn Metadaten nicht lesbar → sicherheitshalber neu anlegen
recreate_edges = True
else:
# Existiert noch nicht → wird gleich erstellt
pass
if recreate_edges and client.collection_exists(edges):
client.delete_collection(edges)
if not client.collection_exists(edges):
client.create_collection(
collection_name=edges,
vectors_config=rest.VectorParams(size=1, distance=rest.Distance.DOT),
)
# -------------------------------
# (Optionale) Utility-Funktionen
# -------------------------------
def collection_names(prefix: str) -> tuple[str, str, str]:
"""Hilfsfunktion, falls du die Namen an einer Stelle brauchst."""
return (f"{prefix}_notes", f"{prefix}_chunks", f"{prefix}_edges")
def wipe_collections(client: QdrantClient, prefix: str) -> None:
"""
Löscht alle drei Collections (nur nutzen, wenn du bewusst neu aufsetzen willst).
"""
for name in collection_names(prefix):
if client.collection_exists(name):
client.delete_collection(name)