'PY' from __future__ import annotations from dataclasses import dataclass from typing import Tuple import os from qdrant_client import QdrantClient from qdrant_client.http import models as rest DEFAULT_DIM = int(os.getenv("VECTOR_DIM", "384")) @dataclass class QdrantConfig: url: str api_key: str | None = None prefix: str = "mindnet" dim: int = DEFAULT_DIM def _collection_names(prefix: str) -> Tuple[str, str, str]: notes = f"{prefix}_notes" chunks = f"{prefix}_chunks" edges = f"{prefix}_edges" return notes, chunks, edges def get_client(cfg: QdrantConfig) -> QdrantClient: return QdrantClient(url=cfg.url, api_key=cfg.api_key or None, prefer_grpc=False) def ensure_collections(cfg: QdrantConfig) -> Tuple[str, str, str]: """ Idempotent: legt {prefix}_{notes,chunks,edges} an (falls fehlend) und erzeugt sinnvolle Payload-Indizes. """ client = get_client(cfg) notes, chunks, edges = _collection_names(cfg.prefix) # Vektorkonfigs note_vec = rest.VectorParams(size=cfg.dim, distance=rest.Distance.COSINE) chunk_vec = rest.VectorParams(size=cfg.dim, distance=rest.Distance.COSINE) edge_vec = rest.VectorParams(size=1, distance=rest.Distance.COSINE) # Dummy-Vektor def _create_if_missing(name: str, vparam: rest.VectorParams): try: info = client.get_collection(name) if info and info.status == rest.CollectionStatus.GREEN: return except Exception: pass client.create_collection( collection_name=name, vectors_config=rest.VectorsConfig(params=vparam), optimizers_config=rest.OptimizersConfigDiff(indexing_threshold=20000), on_disk_payload=True, ) _create_if_missing(notes, note_vec) _create_if_missing(chunks, chunk_vec) _create_if_missing(edges, edge_vec) # Payload-Indizes def _ensure_index(name: str, field: str, kind: rest.PayloadSchemaType): try: client.create_payload_index( collection_name=name, field_name=field, field_schema=rest.PayloadSchemaParams(schema=kind), ) except Exception: pass # existiert schon for f in ("note_id", "type", "status", "project", "area", "path", "tags"): _ensure_index(notes, f, rest.PayloadSchemaType.KEYWORD) for f in ("note_id", "type", "tags", "section_title", "section_path", "path", "chunk_index"): _ensure_index(chunks, f, rest.PayloadSchemaType.KEYWORD) for f in ("src_id", "dst_id", "edge_type", "scope"): _ensure_index(edges, f, rest.PayloadSchemaType.KEYWORD) return notes, chunks, edges