app/core/qdrant.py aktualisiert
Some checks failed
Deploy mindnet to llm-node / deploy (push) Failing after 2s

This commit is contained in:
Lars 2025-09-04 08:37:10 +02:00
parent 3dcc9274fc
commit 98f719e42f

View File

@ -1,85 +1,120 @@
# app/core/qdrant.py
from __future__ import annotations from __future__ import annotations
from dataclasses import dataclass
from typing import Tuple
import os import os
from dataclasses import dataclass
from typing import Optional
from qdrant_client import QdrantClient from qdrant_client import QdrantClient
from qdrant_client.http import models as rest from qdrant_client.http import models as rest
DEFAULT_DIM = int(os.getenv("VECTOR_DIM", "384"))
# -------------------------------
# Konfiguration
# -------------------------------
@dataclass @dataclass
class QdrantConfig: class QdrantConfig:
url: str url: str
api_key: str | None = None api_key: Optional[str]
prefix: str = "mindnet" prefix: str
dim: int = DEFAULT_DIM dim: int
def _collection_names(prefix: str) -> Tuple[str, str, str]: @staticmethod
""" def from_env() -> "QdrantConfig":
Liefert die standardisierten Collection-Namen für Notes, Chunks und Edges. # URL (bevorzugt) oder Host/Port
""" url = os.getenv("QDRANT_URL")
return f"{prefix}_notes", f"{prefix}_chunks", f"{prefix}_edges" if not url:
host = os.getenv("QDRANT_HOST", "127.0.0.1")
port = int(os.getenv("QDRANT_PORT", "6333"))
url = f"http://{host}:{port}"
api_key = os.getenv("QDRANT_API_KEY") or None
# Collection-Prefix und Vektor-Dimension
prefix = os.getenv("COLLECTION_PREFIX", "mindnet")
dim = int(os.getenv("VECTOR_DIM", "384")) # MiniLM-384 by default
return QdrantConfig(url=url, api_key=api_key, prefix=prefix, dim=dim)
# -------------------------------
# Client / Setup
# -------------------------------
def get_client(cfg: QdrantConfig) -> QdrantClient: def get_client(cfg: QdrantConfig) -> QdrantClient:
""" """
Erstellt einen QdrantClient basierend auf der Config. Erstellt einen QdrantClient basierend auf der Config.
""" """
return QdrantClient(url=cfg.url, api_key=cfg.api_key or None, prefer_grpc=False) return QdrantClient(url=cfg.url, api_key=cfg.api_key)
def ensure_collections(cfg: QdrantConfig) -> Tuple[str, str, str]:
def ensure_collections(client: QdrantClient, prefix: str, dim: int) -> None:
""" """
Idempotent: legt {prefix}_notes, {prefix}_chunks, {prefix}_edges an, Stellt sicher, dass die drei Collections existieren:
falls sie fehlen, und erzeugt sinnvolle Payload-Indizes. - {prefix}_notes : Vektor-Dim = dim (COSINE)
- {prefix}_chunks : Vektor-Dim = dim (COSINE)
- {prefix}_edges : Vektor-Dim = 1 (DOT) <-- Dummy-Vektor, damit der Python-Client kein 'vector' zwingt
Falls {prefix}_edges bereits vektorlos existiert, wird sie gelöscht und mit 1D neu erstellt.
""" """
client = get_client(cfg) notes = f"{prefix}_notes"
notes, chunks, edges = _collection_names(cfg.prefix) chunks = f"{prefix}_chunks"
edges = f"{prefix}_edges"
# Vektorkonfiguration # Notes
note_vec = rest.VectorParams(size=cfg.dim, distance=rest.Distance.COSINE) if not client.collection_exists(notes):
chunk_vec = rest.VectorParams(size=cfg.dim, distance=rest.Distance.COSINE)
edge_vec = rest.VectorParams(size=1, distance=rest.Distance.COSINE) # Dummy für edges
def _create_if_missing(name: str, vparam: rest.VectorParams):
try:
info = client.get_collection(name)
if info and info.status == rest.CollectionStatus.GREEN:
return
except Exception:
pass
client.create_collection( client.create_collection(
collection_name=name, collection_name=notes,
vectors_config=rest.VectorsConfig(params=vparam), vectors_config=rest.VectorParams(size=dim, distance=rest.Distance.COSINE),
optimizers_config=rest.OptimizersConfigDiff(indexing_threshold=20000),
on_disk_payload=True,
) )
_create_if_missing(notes, note_vec) # Chunks
_create_if_missing(chunks, chunk_vec) if not client.collection_exists(chunks):
_create_if_missing(edges, edge_vec) client.create_collection(
collection_name=chunks,
vectors_config=rest.VectorParams(size=dim, distance=rest.Distance.COSINE),
)
# Payload-Indizes # Edges: 1D Dummy-Vektor (Workaround)
def _ensure_index(name: str, field: str, kind: rest.PayloadSchemaType): recreate_edges = False
if client.collection_exists(edges):
try: try:
client.create_payload_index( info = client.get_collection(edges)
collection_name=name, # Prüfen, ob Vektor-Konfig existiert
field_name=field, vectors_cfg = getattr(getattr(info.result, "config", None), "params", None)
field_schema=rest.PayloadSchemaParams(schema=kind), # Neuere Clients: info.result.config.params.vectors kann VectorParams oder dict sein
) has_vectors = getattr(vectors_cfg, "vectors", None) is not None
if not has_vectors:
recreate_edges = True
except Exception: except Exception:
# Index existiert evtl. schon → ignorieren # Wenn Metadaten nicht lesbar → sicherheitshalber neu anlegen
pass recreate_edges = True
else:
# Existiert noch nicht → wird gleich erstellt
pass
# Notes-Collection if recreate_edges and client.collection_exists(edges):
for f in ("note_id", "type", "status", "project", "area", "path", "tags"): client.delete_collection(edges)
_ensure_index(notes, f, rest.PayloadSchemaType.KEYWORD)
# Chunks-Collection if not client.collection_exists(edges):
for f in ("note_id", "type", "tags", "section_title", "section_path", "path", "chunk_index"): client.create_collection(
_ensure_index(chunks, f, rest.PayloadSchemaType.KEYWORD) collection_name=edges,
vectors_config=rest.VectorParams(size=1, distance=rest.Distance.DOT),
)
# Edges-Collection
for f in ("src_id", "dst_id", "edge_type", "scope"):
_ensure_index(edges, f, rest.PayloadSchemaType.KEYWORD)
return notes, chunks, edges # -------------------------------
# (Optionale) Utility-Funktionen
# -------------------------------
def collection_names(prefix: str) -> tuple[str, str, str]:
"""Hilfsfunktion, falls du die Namen an einer Stelle brauchst."""
return (f"{prefix}_notes", f"{prefix}_chunks", f"{prefix}_edges")
def wipe_collections(client: QdrantClient, prefix: str) -> None:
"""
Löscht alle drei Collections (nur nutzen, wenn du bewusst neu aufsetzen willst).
"""
for name in collection_names(prefix):
if client.collection_exists(name):
client.delete_collection(name)