121 lines
3.9 KiB
Python
121 lines
3.9 KiB
Python
# app/core/qdrant.py
|
|
from __future__ import annotations
|
|
|
|
import os
|
|
from dataclasses import dataclass
|
|
from typing import Optional
|
|
|
|
from qdrant_client import QdrantClient
|
|
from qdrant_client.http import models as rest
|
|
|
|
|
|
# -------------------------------
|
|
# Konfiguration
|
|
# -------------------------------
|
|
|
|
@dataclass
|
|
class QdrantConfig:
|
|
url: str
|
|
api_key: Optional[str]
|
|
prefix: str
|
|
dim: int
|
|
|
|
@staticmethod
|
|
def from_env() -> "QdrantConfig":
|
|
# URL (bevorzugt) oder Host/Port
|
|
url = os.getenv("QDRANT_URL")
|
|
if not url:
|
|
host = os.getenv("QDRANT_HOST", "127.0.0.1")
|
|
port = int(os.getenv("QDRANT_PORT", "6333"))
|
|
url = f"http://{host}:{port}"
|
|
api_key = os.getenv("QDRANT_API_KEY") or None
|
|
|
|
# Collection-Prefix und Vektor-Dimension
|
|
prefix = os.getenv("COLLECTION_PREFIX", "mindnet")
|
|
dim = int(os.getenv("VECTOR_DIM", "384")) # MiniLM-384 by default
|
|
|
|
return QdrantConfig(url=url, api_key=api_key, prefix=prefix, dim=dim)
|
|
|
|
|
|
# -------------------------------
|
|
# Client / Setup
|
|
# -------------------------------
|
|
|
|
def get_client(cfg: QdrantConfig) -> QdrantClient:
|
|
"""
|
|
Erstellt einen QdrantClient basierend auf der Config.
|
|
"""
|
|
return QdrantClient(url=cfg.url, api_key=cfg.api_key)
|
|
|
|
|
|
def ensure_collections(client: QdrantClient, prefix: str, dim: int) -> None:
|
|
"""
|
|
Stellt sicher, dass die drei Collections existieren:
|
|
- {prefix}_notes : Vektor-Dim = dim (COSINE)
|
|
- {prefix}_chunks : Vektor-Dim = dim (COSINE)
|
|
- {prefix}_edges : Vektor-Dim = 1 (DOT) <-- Dummy-Vektor, damit der Python-Client kein 'vector' zwingt
|
|
Falls {prefix}_edges bereits vektorlos existiert, wird sie gelöscht und mit 1D neu erstellt.
|
|
"""
|
|
notes = f"{prefix}_notes"
|
|
chunks = f"{prefix}_chunks"
|
|
edges = f"{prefix}_edges"
|
|
|
|
# Notes
|
|
if not client.collection_exists(notes):
|
|
client.create_collection(
|
|
collection_name=notes,
|
|
vectors_config=rest.VectorParams(size=dim, distance=rest.Distance.COSINE),
|
|
)
|
|
|
|
# Chunks
|
|
if not client.collection_exists(chunks):
|
|
client.create_collection(
|
|
collection_name=chunks,
|
|
vectors_config=rest.VectorParams(size=dim, distance=rest.Distance.COSINE),
|
|
)
|
|
|
|
# Edges: 1D Dummy-Vektor (Workaround)
|
|
recreate_edges = False
|
|
if client.collection_exists(edges):
|
|
try:
|
|
info = client.get_collection(edges)
|
|
# Prüfen, ob Vektor-Konfig existiert
|
|
vectors_cfg = getattr(getattr(info.result, "config", None), "params", None)
|
|
# Neuere Clients: info.result.config.params.vectors kann VectorParams oder dict sein
|
|
has_vectors = getattr(vectors_cfg, "vectors", None) is not None
|
|
if not has_vectors:
|
|
recreate_edges = True
|
|
except Exception:
|
|
# Wenn Metadaten nicht lesbar → sicherheitshalber neu anlegen
|
|
recreate_edges = True
|
|
else:
|
|
# Existiert noch nicht → wird gleich erstellt
|
|
pass
|
|
|
|
if recreate_edges and client.collection_exists(edges):
|
|
client.delete_collection(edges)
|
|
|
|
if not client.collection_exists(edges):
|
|
client.create_collection(
|
|
collection_name=edges,
|
|
vectors_config=rest.VectorParams(size=1, distance=rest.Distance.DOT),
|
|
)
|
|
|
|
|
|
# -------------------------------
|
|
# (Optionale) Utility-Funktionen
|
|
# -------------------------------
|
|
|
|
def collection_names(prefix: str) -> tuple[str, str, str]:
|
|
"""Hilfsfunktion, falls du die Namen an einer Stelle brauchst."""
|
|
return (f"{prefix}_notes", f"{prefix}_chunks", f"{prefix}_edges")
|
|
|
|
|
|
def wipe_collections(client: QdrantClient, prefix: str) -> None:
|
|
"""
|
|
Löscht alle drei Collections (nur nutzen, wenn du bewusst neu aufsetzen willst).
|
|
"""
|
|
for name in collection_names(prefix):
|
|
if client.collection_exists(name):
|
|
client.delete_collection(name)
|