mindnet/app/core/qdrant.py
Lars da71d0b4fe
All checks were successful
Deploy mindnet to llm-node / deploy (push) Successful in 3s
app/core/qdrant.py aktualisiert
2025-09-03 08:12:31 +02:00

86 lines
2.9 KiB
Python

from __future__ import annotations
from dataclasses import dataclass
from typing import Tuple
import os
from qdrant_client import QdrantClient
from qdrant_client.http import models as rest
DEFAULT_DIM = int(os.getenv("VECTOR_DIM", "384"))
@dataclass
class QdrantConfig:
url: str
api_key: str | None = None
prefix: str = "mindnet"
dim: int = DEFAULT_DIM
def _collection_names(prefix: str) -> Tuple[str, str, str]:
"""
Liefert die standardisierten Collection-Namen für Notes, Chunks und Edges.
"""
return f"{prefix}_notes", f"{prefix}_chunks", f"{prefix}_edges"
def get_client(cfg: QdrantConfig) -> QdrantClient:
"""
Erstellt einen QdrantClient basierend auf der Config.
"""
return QdrantClient(url=cfg.url, api_key=cfg.api_key or None, prefer_grpc=False)
def ensure_collections(cfg: QdrantConfig) -> Tuple[str, str, str]:
"""
Idempotent: legt {prefix}_notes, {prefix}_chunks, {prefix}_edges an,
falls sie fehlen, und erzeugt sinnvolle Payload-Indizes.
"""
client = get_client(cfg)
notes, chunks, edges = _collection_names(cfg.prefix)
# Vektorkonfiguration
note_vec = rest.VectorParams(size=cfg.dim, distance=rest.Distance.COSINE)
chunk_vec = rest.VectorParams(size=cfg.dim, distance=rest.Distance.COSINE)
edge_vec = rest.VectorParams(size=1, distance=rest.Distance.COSINE) # Dummy für edges
def _create_if_missing(name: str, vparam: rest.VectorParams):
try:
info = client.get_collection(name)
if info and info.status == rest.CollectionStatus.GREEN:
return
except Exception:
pass
client.create_collection(
collection_name=name,
vectors_config=rest.VectorsConfig(params=vparam),
optimizers_config=rest.OptimizersConfigDiff(indexing_threshold=20000),
on_disk_payload=True,
)
_create_if_missing(notes, note_vec)
_create_if_missing(chunks, chunk_vec)
_create_if_missing(edges, edge_vec)
# Payload-Indizes
def _ensure_index(name: str, field: str, kind: rest.PayloadSchemaType):
try:
client.create_payload_index(
collection_name=name,
field_name=field,
field_schema=rest.PayloadSchemaParams(schema=kind),
)
except Exception:
# Index existiert evtl. schon → ignorieren
pass
# Notes-Collection
for f in ("note_id", "type", "status", "project", "area", "path", "tags"):
_ensure_index(notes, f, rest.PayloadSchemaType.KEYWORD)
# Chunks-Collection
for f in ("note_id", "type", "tags", "section_title", "section_path", "path", "chunk_index"):
_ensure_index(chunks, f, rest.PayloadSchemaType.KEYWORD)
# Edges-Collection
for f in ("src_id", "dst_id", "edge_type", "scope"):
_ensure_index(edges, f, rest.PayloadSchemaType.KEYWORD)
return notes, chunks, edges