mindnet/app/core/qdrant.py
Lars fd30e2c026
Some checks failed
Deploy mindnet to llm-node / deploy (push) Failing after 1s
app/core/qdrant.py aktualisiert
2025-09-05 09:27:28 +02:00

153 lines
5.3 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Name: app/core/qdrant.py
Version: v1.3.1 (2025-09-05)
Kurzbeschreibung:
Qdrant-Client & Collection-Setup für mindnet.
- Stellt sicher, dass {prefix}_notes / {prefix}_chunks / {prefix}_edges existieren.
- Edges-Collection nutzt 1D Dummy-Vektor (Workaround für Python-Client).
- **Nicht-destruktiv per Default**: ensure_collections(..., destructive=False).
- **Abwärtskompatibel**: collection_names(prefix) wieder verfügbar.
Aufruf/Verwendung:
from app.core.qdrant import QdrantConfig, get_client, ensure_collections, collection_names
Umgebungsvariablen (optional):
QDRANT_URL | QDRANT_HOST/QDRANT_PORT, QDRANT_API_KEY,
COLLECTION_PREFIX (Default "mindnet"), VECTOR_DIM (Default 384)
Änderungen:
v1.3.1: Helper collection_names(prefix) wiederhergestellt (für reset_qdrant usw.).
v1.3.0: ensure_collections(..., destructive=False) keine stillen Drops im Dry-Run.
Edges-Collection nur bei explicit destructive=True neu anlegen.
≤v1.2.x: Konnte {prefix}_edges bei fehlender VectorConfig automatisch neu erstellen (riskant).
Bezug/Quelle:
Alte Core-Variante enthielt collection_names(prefix); diverse Scripts nutzen das weiterhin. :contentReference[oaicite:1]{index=1}
"""
from __future__ import annotations
import os
from dataclasses import dataclass
from typing import Optional, Tuple
from qdrant_client import QdrantClient
from qdrant_client.http import models as rest
# -------------------------------
# Konfiguration
# -------------------------------
@dataclass
class QdrantConfig:
url: str
api_key: Optional[str]
prefix: str
dim: int
@staticmethod
def from_env() -> "QdrantConfig":
url = os.getenv("QDRANT_URL")
if not url:
host = os.getenv("QDRANT_HOST", "127.0.0.1")
port = int(os.getenv("QDRANT_PORT", "6333"))
url = f"http://{host}:{port}"
api_key = os.getenv("QDRANT_API_KEY") or None
prefix = os.getenv("COLLECTION_PREFIX", "mindnet")
dim = int(os.getenv("VECTOR_DIM", "384"))
return QdrantConfig(url=url, api_key=api_key, prefix=prefix, dim=dim)
# -------------------------------
# Client
# -------------------------------
def get_client(cfg: QdrantConfig) -> QdrantClient:
return QdrantClient(url=cfg.url, api_key=cfg.api_key)
# -------------------------------
# Collection-Erzeuger (Hilfsfunktionen)
# -------------------------------
def _create_notes(client: QdrantClient, name: str, dim: int) -> None:
if not client.collection_exists(name):
client.create_collection(
collection_name=name,
vectors_config=rest.VectorParams(size=dim, distance=rest.Distance.COSINE),
)
def _create_chunks(client: QdrantClient, name: str, dim: int) -> None:
if not client.collection_exists(name):
client.create_collection(
collection_name=name,
vectors_config=rest.VectorParams(size=dim, distance=rest.Distance.COSINE),
)
def _create_edges(client: QdrantClient, name: str) -> None:
if not client.collection_exists(name):
client.create_collection(
collection_name=name,
vectors_config=rest.VectorParams(size=1, distance=rest.Distance.DOT), # 1D-Dummy
)
# -------------------------------
# Public API
# -------------------------------
def ensure_collections(client: QdrantClient, prefix: str, dim: int, destructive: bool = False) -> None:
"""
Stellt sicher, dass die drei Collections existieren.
- Default **nicht destruktiv**: vorhandene Collections bleiben unangetastet.
- Nur wenn 'destructive=True', wird eine ungeeignete Edges-Collection gelöscht und neu angelegt.
"""
notes = f"{prefix}_notes"
chunks = f"{prefix}_chunks"
edges = f"{prefix}_edges"
_create_notes(client, notes, dim)
_create_chunks(client, chunks, dim)
if client.collection_exists(edges):
# Prüfen, ob die Edges-Collection bereits eine Vektorkonfig hat
try:
info = client.get_collection(edges)
vectors_cfg = getattr(getattr(info.result, "config", None), "params", None)
has_vectors = getattr(vectors_cfg, "vectors", None) is not None
except Exception:
# konservativ: nichts anfassen, um Datenverlust zu vermeiden
has_vectors = True
if not has_vectors:
if destructive:
client.delete_collection(edges)
_create_edges(client, edges)
else:
print(
f"[ensure_collections] WARN: '{edges}' ohne VectorConfig gefunden; "
f"keine destruktive Änderung (destructive=False).",
flush=True,
)
else:
_create_edges(client, edges)
def collection_names(prefix: str) -> Tuple[str, str, str]:
"""
Abwärtskompatibler Helper für Scripts:
returns (f"{prefix}_notes", f"{prefix}_chunks", f"{prefix}_edges")
"""
return (f"{prefix}_notes", f"{prefix}_chunks", f"{prefix}_edges")
def wipe_collections(client: QdrantClient, prefix: str) -> None:
"""
Löscht alle drei Collections nur verwenden, wenn bewusst ein Clean-Rebuild gewünscht ist.
"""
for name in collection_names(prefix):
if client.collection_exists(name):
client.delete_collection(name)