""" FILE: app/core/database/qdrant.py DESCRIPTION: Qdrant-Client Factory und Schema-Management. Erstellt Collections und Payload-Indizes. MODULARISIERUNG: Verschoben in das database-Paket für WP-14. VERSION: 2.2.2 (WP-Fix: Index für target_section) STATUS: Active DEPENDENCIES: qdrant_client, dataclasses, os """ from __future__ import annotations import os import logging from dataclasses import dataclass from typing import Optional, Tuple, Dict, List from qdrant_client import QdrantClient from qdrant_client.http import models as rest logger = logging.getLogger(__name__) # --------------------------------------------------------------------------- # Konfiguration # --------------------------------------------------------------------------- @dataclass class QdrantConfig: """Konfigurationsobjekt für den Qdrant-Verbindungsaufbau.""" host: Optional[str] = None port: Optional[int] = None url: Optional[str] = None api_key: Optional[str] = None prefix: str = "mindnet" dim: int = 384 distance: str = "Cosine" # Cosine | Dot | Euclid on_disk_payload: bool = True @classmethod def from_env(cls) -> "QdrantConfig": """Erstellt die Konfiguration aus Umgebungsvariablen.""" # Entweder URL ODER Host/Port, API-Key optional url = os.getenv("QDRANT_URL") or None host = os.getenv("QDRANT_HOST") or None port = os.getenv("QDRANT_PORT") port = int(port) if port else None api_key = os.getenv("QDRANT_API_KEY") or None prefix = os.getenv("COLLECTION_PREFIX") or "mindnet" dim = int(os.getenv("VECTOR_DIM") or 384) distance = os.getenv("DISTANCE", "Cosine") on_disk_payload = (os.getenv("ON_DISK_PAYLOAD", "true").lower() == "true") return cls( host=host, port=port, url=url, api_key=api_key, prefix=prefix, dim=dim, distance=distance, on_disk_payload=on_disk_payload ) def get_client(cfg: QdrantConfig) -> QdrantClient: """Initialisiert den Qdrant-Client basierend auf der Konfiguration.""" # QdrantClient akzeptiert entweder url=... oder host/port if cfg.url: return QdrantClient(url=cfg.url, api_key=cfg.api_key, timeout=60.0) return QdrantClient(host=cfg.host or "127.0.0.1", port=cfg.port or 6333, api_key=cfg.api_key, timeout=60.0) # --------------------------------------------------------------------------- # Collections # --------------------------------------------------------------------------- def collection_names(prefix: str) -> Tuple[str, str, str]: """Gibt die standardisierten Collection-Namen zurück.""" return f"{prefix}_notes", f"{prefix}_chunks", f"{prefix}_edges" def _vector_params(dim: int, distance: str) -> rest.VectorParams: """Erstellt Vektor-Parameter für das Collection-Schema.""" # Distance: "Cosine" | "Dot" | "Euclid" dist = getattr(rest.Distance, distance.capitalize(), rest.Distance.COSINE) return rest.VectorParams(size=dim, distance=dist) def ensure_collections(client: QdrantClient, prefix: str, dim: int) -> None: """Legt notes, chunks und edges Collections an, falls nicht vorhanden.""" notes, chunks, edges = collection_names(prefix) # notes if not client.collection_exists(notes): client.create_collection( collection_name=notes, vectors_config=_vector_params(dim, os.getenv("DISTANCE", "Cosine")), on_disk_payload=True, ) # chunks if not client.collection_exists(chunks): client.create_collection( collection_name=chunks, vectors_config=_vector_params(dim, os.getenv("DISTANCE", "Cosine")), on_disk_payload=True, ) # edges (Dummy-Vektor, da primär via Payload gefiltert wird) if not client.collection_exists(edges): client.create_collection( collection_name=edges, vectors_config=_vector_params(1, "Dot"), on_disk_payload=True, ) # --------------------------------------------------------------------------- # Payload-Indizes # --------------------------------------------------------------------------- def _ensure_index(client: QdrantClient, collection: str, field: str, schema: rest.PayloadSchemaType) -> None: """Idempotentes Anlegen eines Payload-Indexes für ein spezifisches Feld.""" try: client.create_payload_index(collection_name=collection, field_name=field, field_schema=schema, wait=True) except Exception as e: # Fehler ignorieren, falls Index bereits existiert logger.debug(f"Index check for {field} in {collection}: {e}") def ensure_payload_indexes(client: QdrantClient, prefix: str) -> None: """ Stellt sicher, dass alle benötigten Payload-Indizes für die Suche existieren. - notes: note_id, type, title, updated, tags - chunks: note_id, chunk_id, index, type, tags - edges: note_id, kind, scope, source_id, target_id, chunk_id, target_section """ notes, chunks, edges = collection_names(prefix) # NOTES for field, schema in [ ("note_id", rest.PayloadSchemaType.KEYWORD), ("type", rest.PayloadSchemaType.KEYWORD), ("title", rest.PayloadSchemaType.TEXT), ("updated", rest.PayloadSchemaType.INTEGER), ("tags", rest.PayloadSchemaType.KEYWORD), ]: _ensure_index(client, notes, field, schema) # CHUNKS for field, schema in [ ("note_id", rest.PayloadSchemaType.KEYWORD), ("chunk_id", rest.PayloadSchemaType.KEYWORD), ("index", rest.PayloadSchemaType.INTEGER), ("type", rest.PayloadSchemaType.KEYWORD), ("tags", rest.PayloadSchemaType.KEYWORD), ]: _ensure_index(client, chunks, field, schema) # EDGES for field, schema in [ ("note_id", rest.PayloadSchemaType.KEYWORD), ("kind", rest.PayloadSchemaType.KEYWORD), ("scope", rest.PayloadSchemaType.KEYWORD), ("source_id", rest.PayloadSchemaType.KEYWORD), ("target_id", rest.PayloadSchemaType.KEYWORD), ("chunk_id", rest.PayloadSchemaType.KEYWORD), # NEU: Index für Section-Links (WP-15b) ("target_section", rest.PayloadSchemaType.KEYWORD), ]: _ensure_index(client, edges, field, schema) __all__ = [ "QdrantConfig", "get_client", "ensure_collections", "ensure_payload_indexes", "collection_names", ]