app/core/qdrant.py aktualisiert
Some checks failed
Deploy mindnet to llm-node / deploy (push) Failing after 2s
Some checks failed
Deploy mindnet to llm-node / deploy (push) Failing after 2s
This commit is contained in:
parent
3dcc9274fc
commit
98f719e42f
|
|
@ -1,85 +1,120 @@
|
||||||
|
# app/core/qdrant.py
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
from dataclasses import dataclass
|
|
||||||
from typing import Tuple
|
|
||||||
import os
|
import os
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
from qdrant_client import QdrantClient
|
from qdrant_client import QdrantClient
|
||||||
from qdrant_client.http import models as rest
|
from qdrant_client.http import models as rest
|
||||||
|
|
||||||
DEFAULT_DIM = int(os.getenv("VECTOR_DIM", "384"))
|
|
||||||
|
# -------------------------------
|
||||||
|
# Konfiguration
|
||||||
|
# -------------------------------
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class QdrantConfig:
|
class QdrantConfig:
|
||||||
url: str
|
url: str
|
||||||
api_key: str | None = None
|
api_key: Optional[str]
|
||||||
prefix: str = "mindnet"
|
prefix: str
|
||||||
dim: int = DEFAULT_DIM
|
dim: int
|
||||||
|
|
||||||
def _collection_names(prefix: str) -> Tuple[str, str, str]:
|
@staticmethod
|
||||||
"""
|
def from_env() -> "QdrantConfig":
|
||||||
Liefert die standardisierten Collection-Namen für Notes, Chunks und Edges.
|
# URL (bevorzugt) oder Host/Port
|
||||||
"""
|
url = os.getenv("QDRANT_URL")
|
||||||
return f"{prefix}_notes", f"{prefix}_chunks", f"{prefix}_edges"
|
if not url:
|
||||||
|
host = os.getenv("QDRANT_HOST", "127.0.0.1")
|
||||||
|
port = int(os.getenv("QDRANT_PORT", "6333"))
|
||||||
|
url = f"http://{host}:{port}"
|
||||||
|
api_key = os.getenv("QDRANT_API_KEY") or None
|
||||||
|
|
||||||
|
# Collection-Prefix und Vektor-Dimension
|
||||||
|
prefix = os.getenv("COLLECTION_PREFIX", "mindnet")
|
||||||
|
dim = int(os.getenv("VECTOR_DIM", "384")) # MiniLM-384 by default
|
||||||
|
|
||||||
|
return QdrantConfig(url=url, api_key=api_key, prefix=prefix, dim=dim)
|
||||||
|
|
||||||
|
|
||||||
|
# -------------------------------
|
||||||
|
# Client / Setup
|
||||||
|
# -------------------------------
|
||||||
|
|
||||||
def get_client(cfg: QdrantConfig) -> QdrantClient:
|
def get_client(cfg: QdrantConfig) -> QdrantClient:
|
||||||
"""
|
"""
|
||||||
Erstellt einen QdrantClient basierend auf der Config.
|
Erstellt einen QdrantClient basierend auf der Config.
|
||||||
"""
|
"""
|
||||||
return QdrantClient(url=cfg.url, api_key=cfg.api_key or None, prefer_grpc=False)
|
return QdrantClient(url=cfg.url, api_key=cfg.api_key)
|
||||||
|
|
||||||
def ensure_collections(cfg: QdrantConfig) -> Tuple[str, str, str]:
|
|
||||||
|
def ensure_collections(client: QdrantClient, prefix: str, dim: int) -> None:
|
||||||
"""
|
"""
|
||||||
Idempotent: legt {prefix}_notes, {prefix}_chunks, {prefix}_edges an,
|
Stellt sicher, dass die drei Collections existieren:
|
||||||
falls sie fehlen, und erzeugt sinnvolle Payload-Indizes.
|
- {prefix}_notes : Vektor-Dim = dim (COSINE)
|
||||||
|
- {prefix}_chunks : Vektor-Dim = dim (COSINE)
|
||||||
|
- {prefix}_edges : Vektor-Dim = 1 (DOT) <-- Dummy-Vektor, damit der Python-Client kein 'vector' zwingt
|
||||||
|
Falls {prefix}_edges bereits vektorlos existiert, wird sie gelöscht und mit 1D neu erstellt.
|
||||||
"""
|
"""
|
||||||
client = get_client(cfg)
|
notes = f"{prefix}_notes"
|
||||||
notes, chunks, edges = _collection_names(cfg.prefix)
|
chunks = f"{prefix}_chunks"
|
||||||
|
edges = f"{prefix}_edges"
|
||||||
|
|
||||||
# Vektorkonfiguration
|
# Notes
|
||||||
note_vec = rest.VectorParams(size=cfg.dim, distance=rest.Distance.COSINE)
|
if not client.collection_exists(notes):
|
||||||
chunk_vec = rest.VectorParams(size=cfg.dim, distance=rest.Distance.COSINE)
|
|
||||||
edge_vec = rest.VectorParams(size=1, distance=rest.Distance.COSINE) # Dummy für edges
|
|
||||||
|
|
||||||
def _create_if_missing(name: str, vparam: rest.VectorParams):
|
|
||||||
try:
|
|
||||||
info = client.get_collection(name)
|
|
||||||
if info and info.status == rest.CollectionStatus.GREEN:
|
|
||||||
return
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
client.create_collection(
|
client.create_collection(
|
||||||
collection_name=name,
|
collection_name=notes,
|
||||||
vectors_config=rest.VectorsConfig(params=vparam),
|
vectors_config=rest.VectorParams(size=dim, distance=rest.Distance.COSINE),
|
||||||
optimizers_config=rest.OptimizersConfigDiff(indexing_threshold=20000),
|
|
||||||
on_disk_payload=True,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
_create_if_missing(notes, note_vec)
|
# Chunks
|
||||||
_create_if_missing(chunks, chunk_vec)
|
if not client.collection_exists(chunks):
|
||||||
_create_if_missing(edges, edge_vec)
|
client.create_collection(
|
||||||
|
collection_name=chunks,
|
||||||
|
vectors_config=rest.VectorParams(size=dim, distance=rest.Distance.COSINE),
|
||||||
|
)
|
||||||
|
|
||||||
# Payload-Indizes
|
# Edges: 1D Dummy-Vektor (Workaround)
|
||||||
def _ensure_index(name: str, field: str, kind: rest.PayloadSchemaType):
|
recreate_edges = False
|
||||||
|
if client.collection_exists(edges):
|
||||||
try:
|
try:
|
||||||
client.create_payload_index(
|
info = client.get_collection(edges)
|
||||||
collection_name=name,
|
# Prüfen, ob Vektor-Konfig existiert
|
||||||
field_name=field,
|
vectors_cfg = getattr(getattr(info.result, "config", None), "params", None)
|
||||||
field_schema=rest.PayloadSchemaParams(schema=kind),
|
# Neuere Clients: info.result.config.params.vectors kann VectorParams oder dict sein
|
||||||
)
|
has_vectors = getattr(vectors_cfg, "vectors", None) is not None
|
||||||
|
if not has_vectors:
|
||||||
|
recreate_edges = True
|
||||||
except Exception:
|
except Exception:
|
||||||
# Index existiert evtl. schon → ignorieren
|
# Wenn Metadaten nicht lesbar → sicherheitshalber neu anlegen
|
||||||
pass
|
recreate_edges = True
|
||||||
|
else:
|
||||||
|
# Existiert noch nicht → wird gleich erstellt
|
||||||
|
pass
|
||||||
|
|
||||||
# Notes-Collection
|
if recreate_edges and client.collection_exists(edges):
|
||||||
for f in ("note_id", "type", "status", "project", "area", "path", "tags"):
|
client.delete_collection(edges)
|
||||||
_ensure_index(notes, f, rest.PayloadSchemaType.KEYWORD)
|
|
||||||
|
|
||||||
# Chunks-Collection
|
if not client.collection_exists(edges):
|
||||||
for f in ("note_id", "type", "tags", "section_title", "section_path", "path", "chunk_index"):
|
client.create_collection(
|
||||||
_ensure_index(chunks, f, rest.PayloadSchemaType.KEYWORD)
|
collection_name=edges,
|
||||||
|
vectors_config=rest.VectorParams(size=1, distance=rest.Distance.DOT),
|
||||||
|
)
|
||||||
|
|
||||||
# Edges-Collection
|
|
||||||
for f in ("src_id", "dst_id", "edge_type", "scope"):
|
|
||||||
_ensure_index(edges, f, rest.PayloadSchemaType.KEYWORD)
|
|
||||||
|
|
||||||
return notes, chunks, edges
|
# -------------------------------
|
||||||
|
# (Optionale) Utility-Funktionen
|
||||||
|
# -------------------------------
|
||||||
|
|
||||||
|
def collection_names(prefix: str) -> tuple[str, str, str]:
|
||||||
|
"""Hilfsfunktion, falls du die Namen an einer Stelle brauchst."""
|
||||||
|
return (f"{prefix}_notes", f"{prefix}_chunks", f"{prefix}_edges")
|
||||||
|
|
||||||
|
|
||||||
|
def wipe_collections(client: QdrantClient, prefix: str) -> None:
|
||||||
|
"""
|
||||||
|
Löscht alle drei Collections (nur nutzen, wenn du bewusst neu aufsetzen willst).
|
||||||
|
"""
|
||||||
|
for name in collection_names(prefix):
|
||||||
|
if client.collection_exists(name):
|
||||||
|
client.delete_collection(name)
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user