All checks were successful
Deploy mindnet to llm-node / deploy (push) Successful in 4s
125 lines
4.2 KiB
Python
125 lines
4.2 KiB
Python
#!/usr/bin/env python3
|
|
# -*- coding: utf-8 -*-
|
|
"""
|
|
Name: app/core/qdrant.py
|
|
Version: v1.4.0 (2025-09-09)
|
|
|
|
Kurzbeschreibung:
|
|
Qdrant-Client & Collection-Setup für mindnet.
|
|
- Stellt sicher, dass {prefix}_notes / {prefix}_chunks / {prefix}_edges existieren.
|
|
- Edges-Collection nutzt 1D Dummy-Vektor.
|
|
- NEW: ensure_payload_indexes(...) legt sinnvolle Payload-Indizes an.
|
|
|
|
Aufruf:
|
|
from app.core.qdrant import QdrantConfig, get_client, ensure_collections, ensure_payload_indexes
|
|
"""
|
|
from __future__ import annotations
|
|
import os
|
|
from dataclasses import dataclass
|
|
from typing import Optional, Tuple
|
|
|
|
from qdrant_client import QdrantClient
|
|
from qdrant_client.http import models as rest
|
|
|
|
|
|
@dataclass
|
|
class QdrantConfig:
|
|
url: str
|
|
api_key: Optional[str]
|
|
prefix: str
|
|
dim: int
|
|
|
|
@staticmethod
|
|
def from_env() -> "QdrantConfig":
|
|
url = os.getenv("QDRANT_URL")
|
|
if not url:
|
|
host = os.getenv("QDRANT_HOST", "127.0.0.1")
|
|
port = int(os.getenv("QDRANT_PORT", "6333"))
|
|
url = f"http://{host}:{port}"
|
|
api_key = os.getenv("QDRANT_API_KEY") or None
|
|
prefix = os.getenv("COLLECTION_PREFIX", "mindnet")
|
|
dim = int(os.getenv("VECTOR_DIM", "384"))
|
|
return QdrantConfig(url=url, api_key=api_key, prefix=prefix, dim=dim)
|
|
|
|
|
|
def get_client(cfg: QdrantConfig) -> QdrantClient:
|
|
return QdrantClient(url=cfg.url, api_key=cfg.api_key)
|
|
|
|
|
|
def _create_notes(client: QdrantClient, name: str, dim: int) -> None:
|
|
if not client.collection_exists(name):
|
|
client.create_collection(
|
|
collection_name=name,
|
|
vectors_config=rest.VectorParams(size=dim, distance=rest.Distance.COSINE),
|
|
)
|
|
|
|
def _create_chunks(client: QdrantClient, name: str, dim: int) -> None:
|
|
if not client.collection_exists(name):
|
|
client.create_collection(
|
|
collection_name=name,
|
|
vectors_config=rest.VectorParams(size=dim, distance=rest.Distance.COSINE),
|
|
)
|
|
|
|
def _create_edges(client: QdrantClient, name: str) -> None:
|
|
if not client.collection_exists(name):
|
|
client.create_collection(
|
|
collection_name=name,
|
|
vectors_config=rest.VectorParams(size=1, distance=rest.Distance.DOT), # 1D-Dummy
|
|
)
|
|
|
|
|
|
def ensure_collections(client: QdrantClient, prefix: str, dim: int, destructive: bool = False) -> None:
|
|
notes = f"{prefix}_notes"
|
|
chunks = f"{prefix}_chunks"
|
|
edges = f"{prefix}_edges"
|
|
|
|
_create_notes(client, notes, dim)
|
|
_create_chunks(client, chunks, dim)
|
|
|
|
if client.collection_exists(edges):
|
|
try:
|
|
info = client.get_collection(edges)
|
|
vectors_cfg = getattr(getattr(info.result, "config", None), "params", None)
|
|
has_vectors = getattr(vectors_cfg, "vectors", None) is not None
|
|
except Exception:
|
|
has_vectors = True
|
|
if not has_vectors:
|
|
if destructive:
|
|
client.delete_collection(edges)
|
|
_create_edges(client, edges)
|
|
else:
|
|
print(f"[ensure_collections] WARN: '{edges}' ohne VectorConfig; destructive=False.", flush=True)
|
|
else:
|
|
_create_edges(client, edges)
|
|
|
|
|
|
def collection_names(prefix: str) -> Tuple[str, str, str]:
|
|
return (f"{prefix}_notes", f"{prefix}_chunks", f"{prefix}_edges")
|
|
|
|
|
|
# -------------------------------
|
|
# NEW: Payload-Indexing
|
|
# -------------------------------
|
|
|
|
def _safe_create_index(client: QdrantClient, col: str, field: str, schema: rest.PayloadSchemaType):
|
|
try:
|
|
client.create_payload_index(
|
|
collection_name=col,
|
|
field_name=field,
|
|
field_schema=schema,
|
|
)
|
|
except Exception:
|
|
# bereits vorhanden oder nicht unterstütztes Schema → ignorieren
|
|
pass
|
|
|
|
def ensure_payload_indexes(client: QdrantClient, prefix: str) -> None:
|
|
notes, chunks, edges = collection_names(prefix)
|
|
# Notes
|
|
_safe_create_index(client, notes, "note_id", rest.PayloadSchemaType.KEYWORD)
|
|
# Chunks
|
|
_safe_create_index(client, chunks, "note_id", rest.PayloadSchemaType.KEYWORD)
|
|
_safe_create_index(client, chunks, "chunk_index", rest.PayloadSchemaType.INTEGER)
|
|
# Edges
|
|
for f in ("kind", "scope", "source_id", "target_id", "note_id"):
|
|
_safe_create_index(client, edges, f, rest.PayloadSchemaType.KEYWORD)
|