121 lines
4.3 KiB
Python
121 lines
4.3 KiB
Python
#!/usr/bin/env python3
|
||
# -*- coding: utf-8 -*-
|
||
"""
|
||
Name: app/core/qdrant.py
|
||
Version: v1.3.0 (2025-09-05)
|
||
|
||
Kurzbeschreibung:
|
||
Qdrant-Client & Collection-Setup für mindnet.
|
||
- Stellt sicher, dass {prefix}_notes / {prefix}_chunks / {prefix}_edges vorhanden sind.
|
||
- **NEU:** ensure_collections(..., destructive=False) → keine Datenverluste im Dry-Run.
|
||
- Edges-Collection nutzt 1D Dummy-Vektor (Workaround für Python-Client).
|
||
|
||
API:
|
||
- QdrantConfig.from_env()
|
||
- get_client(cfg)
|
||
- ensure_collections(client, prefix, dim, destructive=False)
|
||
|
||
Änderungen:
|
||
v1.3.0: Destruktive Re-Creation von {prefix}_edges nur noch optional via destructive=True.
|
||
Default ist sicher (keine Löschung vorhandener Collections).
|
||
v1.2.x und älter: konnten {prefix}_edges automatisch löschen/re-anlegen.
|
||
|
||
Quellen:
|
||
- QdrantClient & REST-Modelle (qdrant-client)
|
||
- Mindnet Edge-Workaround (1D-Vektor) wie zuvor. :contentReference[oaicite:1]{index=1}
|
||
"""
|
||
from __future__ import annotations
|
||
import os
|
||
from dataclasses import dataclass
|
||
from typing import Optional
|
||
|
||
from qdrant_client import QdrantClient
|
||
from qdrant_client.http import models as rest
|
||
|
||
|
||
@dataclass
|
||
class QdrantConfig:
|
||
url: str
|
||
api_key: Optional[str]
|
||
prefix: str
|
||
dim: int
|
||
|
||
@staticmethod
|
||
def from_env() -> "QdrantConfig":
|
||
url = os.getenv("QDRANT_URL")
|
||
if not url:
|
||
host = os.getenv("QDRANT_HOST", "127.0.0.1")
|
||
port = int(os.getenv("QDRANT_PORT", "6333"))
|
||
url = f"http://{host}:{port}"
|
||
api_key = os.getenv("QDRANT_API_KEY") or None
|
||
prefix = os.getenv("COLLECTION_PREFIX", "mindnet")
|
||
dim = int(os.getenv("VECTOR_DIM", "384"))
|
||
return QdrantConfig(url=url, api_key=api_key, prefix=prefix, dim=dim)
|
||
|
||
|
||
def get_client(cfg: QdrantConfig) -> QdrantClient:
|
||
return QdrantClient(url=cfg.url, api_key=cfg.api_key)
|
||
|
||
|
||
def _create_notes(client: QdrantClient, name: str, dim: int) -> None:
|
||
if not client.collection_exists(name):
|
||
client.create_collection(
|
||
collection_name=name,
|
||
vectors_config=rest.VectorParams(size=dim, distance=rest.Distance.COSINE),
|
||
)
|
||
|
||
|
||
def _create_chunks(client: QdrantClient, name: str, dim: int) -> None:
|
||
if not client.collection_exists(name):
|
||
client.create_collection(
|
||
collection_name=name,
|
||
vectors_config=rest.VectorParams(size=dim, distance=rest.Distance.COSINE),
|
||
)
|
||
|
||
|
||
def _create_edges(client: QdrantClient, name: str) -> None:
|
||
if not client.collection_exists(name):
|
||
client.create_collection(
|
||
collection_name=name,
|
||
vectors_config=rest.VectorParams(size=1, distance=rest.Distance.DOT), # 1D-Dummy
|
||
)
|
||
|
||
|
||
def ensure_collections(client: QdrantClient, prefix: str, dim: int, destructive: bool = False) -> None:
|
||
"""
|
||
Stellt sicher, dass die drei Collections existieren.
|
||
- Default **nicht destruktiv**: vorhandene Collections bleiben unangetastet.
|
||
- Nur wenn 'destructive=True', wird eine ungeeignete Edges-Collection gelöscht und neu angelegt.
|
||
|
||
Hinweis:
|
||
Frühere Versionen haben {prefix}_edges ggf. automatisch gelöscht (riskant in Dry-Runs).
|
||
Diese Version tut das **nur** auf ausdrücklichen Wunsch (destructive=True).
|
||
"""
|
||
notes = f"{prefix}_notes"
|
||
chunks = f"{prefix}_chunks"
|
||
edges = f"{prefix}_edges"
|
||
|
||
_create_notes(client, notes, dim)
|
||
_create_chunks(client, chunks, dim)
|
||
|
||
if client.collection_exists(edges):
|
||
# Prüfen, ob die Edges-Collection bereits eine Vektorkonfig hat
|
||
try:
|
||
info = client.get_collection(edges)
|
||
vectors_cfg = getattr(getattr(info.result, "config", None), "params", None)
|
||
has_vectors = getattr(vectors_cfg, "vectors", None) is not None
|
||
except Exception:
|
||
has_vectors = True # konservativ: nichts anfassen
|
||
|
||
if not has_vectors:
|
||
if destructive:
|
||
client.delete_collection(edges)
|
||
_create_edges(client, edges)
|
||
else:
|
||
# Sicher: behalten und nur warnen – keine Datenverluste
|
||
print(f"[ensure_collections] WARN: '{edges}' ohne VectorConfig gefunden; "
|
||
f"keine destruktive Änderung (destructive=False).", flush=True)
|
||
# sonst: alles gut, nichts tun
|
||
else:
|
||
_create_edges(client, edges)
|