From afc969dc91a524209bf932df7625f19d77e0f1e1 Mon Sep 17 00:00:00 2001 From: Lars Date: Wed, 3 Sep 2025 08:04:02 +0200 Subject: [PATCH] =?UTF-8?q?app/core/qdrant.py=20hinzugef=C3=BCgt?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/core/qdrant.py | 79 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) create mode 100644 app/core/qdrant.py diff --git a/app/core/qdrant.py b/app/core/qdrant.py new file mode 100644 index 0000000..86c2896 --- /dev/null +++ b/app/core/qdrant.py @@ -0,0 +1,79 @@ +'PY' +from __future__ import annotations +from dataclasses import dataclass +from typing import Tuple +import os + +from qdrant_client import QdrantClient +from qdrant_client.http import models as rest + +DEFAULT_DIM = int(os.getenv("VECTOR_DIM", "384")) + +@dataclass +class QdrantConfig: + url: str + api_key: str | None = None + prefix: str = "mindnet" + dim: int = DEFAULT_DIM + +def _collection_names(prefix: str) -> Tuple[str, str, str]: + notes = f"{prefix}_notes" + chunks = f"{prefix}_chunks" + edges = f"{prefix}_edges" + return notes, chunks, edges + +def get_client(cfg: QdrantConfig) -> QdrantClient: + return QdrantClient(url=cfg.url, api_key=cfg.api_key or None, prefer_grpc=False) + +def ensure_collections(cfg: QdrantConfig) -> Tuple[str, str, str]: + """ + Idempotent: legt {prefix}_{notes,chunks,edges} an (falls fehlend) + und erzeugt sinnvolle Payload-Indizes. + """ + client = get_client(cfg) + notes, chunks, edges = _collection_names(cfg.prefix) + + # Vektorkonfigs + note_vec = rest.VectorParams(size=cfg.dim, distance=rest.Distance.COSINE) + chunk_vec = rest.VectorParams(size=cfg.dim, distance=rest.Distance.COSINE) + edge_vec = rest.VectorParams(size=1, distance=rest.Distance.COSINE) # Dummy-Vektor + + def _create_if_missing(name: str, vparam: rest.VectorParams): + try: + info = client.get_collection(name) + if info and info.status == rest.CollectionStatus.GREEN: + return + except Exception: + pass + client.create_collection( + collection_name=name, + vectors_config=rest.VectorsConfig(params=vparam), + optimizers_config=rest.OptimizersConfigDiff(indexing_threshold=20000), + on_disk_payload=True, + ) + + _create_if_missing(notes, note_vec) + _create_if_missing(chunks, chunk_vec) + _create_if_missing(edges, edge_vec) + + # Payload-Indizes + def _ensure_index(name: str, field: str, kind: rest.PayloadSchemaType): + try: + client.create_payload_index( + collection_name=name, + field_name=field, + field_schema=rest.PayloadSchemaParams(schema=kind), + ) + except Exception: + pass # existiert schon + + for f in ("note_id", "type", "status", "project", "area", "path", "tags"): + _ensure_index(notes, f, rest.PayloadSchemaType.KEYWORD) + + for f in ("note_id", "type", "tags", "section_title", "section_path", "path", "chunk_index"): + _ensure_index(chunks, f, rest.PayloadSchemaType.KEYWORD) + + for f in ("src_id", "dst_id", "edge_type", "scope"): + _ensure_index(edges, f, rest.PayloadSchemaType.KEYWORD) + + return notes, chunks, edges