171 lines
6.4 KiB
Python
171 lines
6.4 KiB
Python
"""
|
|
FILE: app/core/database/qdrant.py
|
|
DESCRIPTION: Qdrant-Client Factory und Schema-Management.
|
|
Erstellt Collections und Payload-Indizes.
|
|
MODULARISIERUNG: Verschoben in das database-Paket für WP-14.
|
|
VERSION: 2.2.2 (WP-Fix: Index für target_section)
|
|
STATUS: Active
|
|
DEPENDENCIES: qdrant_client, dataclasses, os
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import os
|
|
import logging
|
|
from dataclasses import dataclass
|
|
from typing import Optional, Tuple, Dict, List
|
|
|
|
from qdrant_client import QdrantClient
|
|
from qdrant_client.http import models as rest
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Konfiguration
|
|
# ---------------------------------------------------------------------------
|
|
|
|
@dataclass
|
|
class QdrantConfig:
|
|
"""Konfigurationsobjekt für den Qdrant-Verbindungsaufbau."""
|
|
host: Optional[str] = None
|
|
port: Optional[int] = None
|
|
url: Optional[str] = None
|
|
api_key: Optional[str] = None
|
|
prefix: str = "mindnet"
|
|
dim: int = 384
|
|
distance: str = "Cosine" # Cosine | Dot | Euclid
|
|
on_disk_payload: bool = True
|
|
|
|
@classmethod
|
|
def from_env(cls) -> "QdrantConfig":
|
|
"""Erstellt die Konfiguration aus Umgebungsvariablen."""
|
|
# Entweder URL ODER Host/Port, API-Key optional
|
|
url = os.getenv("QDRANT_URL") or None
|
|
host = os.getenv("QDRANT_HOST") or None
|
|
port = os.getenv("QDRANT_PORT")
|
|
port = int(port) if port else None
|
|
api_key = os.getenv("QDRANT_API_KEY") or None
|
|
prefix = os.getenv("COLLECTION_PREFIX") or "mindnet"
|
|
dim = int(os.getenv("VECTOR_DIM") or 384)
|
|
distance = os.getenv("DISTANCE", "Cosine")
|
|
on_disk_payload = (os.getenv("ON_DISK_PAYLOAD", "true").lower() == "true")
|
|
|
|
return cls(
|
|
host=host, port=port, url=url, api_key=api_key,
|
|
prefix=prefix, dim=dim, distance=distance, on_disk_payload=on_disk_payload
|
|
)
|
|
|
|
|
|
def get_client(cfg: QdrantConfig) -> QdrantClient:
|
|
"""Initialisiert den Qdrant-Client basierend auf der Konfiguration."""
|
|
# QdrantClient akzeptiert entweder url=... oder host/port
|
|
if cfg.url:
|
|
return QdrantClient(url=cfg.url, api_key=cfg.api_key, timeout=60.0)
|
|
return QdrantClient(host=cfg.host or "127.0.0.1", port=cfg.port or 6333, api_key=cfg.api_key, timeout=60.0)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Collections
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def collection_names(prefix: str) -> Tuple[str, str, str]:
|
|
"""Gibt die standardisierten Collection-Namen zurück."""
|
|
return f"{prefix}_notes", f"{prefix}_chunks", f"{prefix}_edges"
|
|
|
|
|
|
def _vector_params(dim: int, distance: str) -> rest.VectorParams:
|
|
"""Erstellt Vektor-Parameter für das Collection-Schema."""
|
|
# Distance: "Cosine" | "Dot" | "Euclid"
|
|
dist = getattr(rest.Distance, distance.capitalize(), rest.Distance.COSINE)
|
|
return rest.VectorParams(size=dim, distance=dist)
|
|
|
|
|
|
def ensure_collections(client: QdrantClient, prefix: str, dim: int) -> None:
|
|
"""Legt notes, chunks und edges Collections an, falls nicht vorhanden."""
|
|
notes, chunks, edges = collection_names(prefix)
|
|
|
|
# notes
|
|
if not client.collection_exists(notes):
|
|
client.create_collection(
|
|
collection_name=notes,
|
|
vectors_config=_vector_params(dim, os.getenv("DISTANCE", "Cosine")),
|
|
on_disk_payload=True,
|
|
)
|
|
# chunks
|
|
if not client.collection_exists(chunks):
|
|
client.create_collection(
|
|
collection_name=chunks,
|
|
vectors_config=_vector_params(dim, os.getenv("DISTANCE", "Cosine")),
|
|
on_disk_payload=True,
|
|
)
|
|
# edges (Dummy-Vektor, da primär via Payload gefiltert wird)
|
|
if not client.collection_exists(edges):
|
|
client.create_collection(
|
|
collection_name=edges,
|
|
vectors_config=_vector_params(1, "Dot"),
|
|
on_disk_payload=True,
|
|
)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Payload-Indizes
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def _ensure_index(client: QdrantClient, collection: str, field: str, schema: rest.PayloadSchemaType) -> None:
|
|
"""Idempotentes Anlegen eines Payload-Indexes für ein spezifisches Feld."""
|
|
try:
|
|
client.create_payload_index(collection_name=collection, field_name=field, field_schema=schema, wait=True)
|
|
except Exception as e:
|
|
# Fehler ignorieren, falls Index bereits existiert
|
|
logger.debug(f"Index check for {field} in {collection}: {e}")
|
|
|
|
|
|
def ensure_payload_indexes(client: QdrantClient, prefix: str) -> None:
|
|
"""
|
|
Stellt sicher, dass alle benötigten Payload-Indizes für die Suche existieren.
|
|
- notes: note_id, type, title, updated, tags
|
|
- chunks: note_id, chunk_id, index, type, tags
|
|
- edges: note_id, kind, scope, source_id, target_id, chunk_id, target_section
|
|
"""
|
|
notes, chunks, edges = collection_names(prefix)
|
|
|
|
# NOTES
|
|
for field, schema in [
|
|
("note_id", rest.PayloadSchemaType.KEYWORD),
|
|
("type", rest.PayloadSchemaType.KEYWORD),
|
|
("title", rest.PayloadSchemaType.TEXT),
|
|
("updated", rest.PayloadSchemaType.INTEGER),
|
|
("tags", rest.PayloadSchemaType.KEYWORD),
|
|
]:
|
|
_ensure_index(client, notes, field, schema)
|
|
|
|
# CHUNKS
|
|
for field, schema in [
|
|
("note_id", rest.PayloadSchemaType.KEYWORD),
|
|
("chunk_id", rest.PayloadSchemaType.KEYWORD),
|
|
("index", rest.PayloadSchemaType.INTEGER),
|
|
("type", rest.PayloadSchemaType.KEYWORD),
|
|
("tags", rest.PayloadSchemaType.KEYWORD),
|
|
]:
|
|
_ensure_index(client, chunks, field, schema)
|
|
|
|
# EDGES
|
|
for field, schema in [
|
|
("note_id", rest.PayloadSchemaType.KEYWORD),
|
|
("kind", rest.PayloadSchemaType.KEYWORD),
|
|
("scope", rest.PayloadSchemaType.KEYWORD),
|
|
("source_id", rest.PayloadSchemaType.KEYWORD),
|
|
("target_id", rest.PayloadSchemaType.KEYWORD),
|
|
("chunk_id", rest.PayloadSchemaType.KEYWORD),
|
|
# NEU: Index für Section-Links (WP-15b)
|
|
("target_section", rest.PayloadSchemaType.KEYWORD),
|
|
]:
|
|
_ensure_index(client, edges, field, schema)
|
|
|
|
|
|
__all__ = [
|
|
"QdrantConfig",
|
|
"get_client",
|
|
"ensure_collections",
|
|
"ensure_payload_indexes",
|
|
"collection_names",
|
|
] |