app/core/qdrant.py aktualisiert
Some checks failed
Deploy mindnet to llm-node / deploy (push) Failing after 2s

This commit is contained in:
Lars 2025-09-09 11:51:52 +02:00
parent 6f32c60c47
commit 8f48701ea0

View File

@ -2,30 +2,16 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
""" """
Name: app/core/qdrant.py Name: app/core/qdrant.py
Version: v1.3.1 (2025-09-05) Version: v1.4.0 (2025-09-09)
Kurzbeschreibung: Kurzbeschreibung:
Qdrant-Client & Collection-Setup für mindnet. Qdrant-Client & Collection-Setup für mindnet.
- Stellt sicher, dass {prefix}_notes / {prefix}_chunks / {prefix}_edges existieren. - Stellt sicher, dass {prefix}_notes / {prefix}_chunks / {prefix}_edges existieren.
- Edges-Collection nutzt 1D Dummy-Vektor (Workaround für Python-Client). - Edges-Collection nutzt 1D Dummy-Vektor.
- **Nicht-destruktiv per Default**: ensure_collections(..., destructive=False). - NEW: ensure_payload_indexes(...) legt sinnvolle Payload-Indizes an.
- **Abwärtskompatibel**: collection_names(prefix) wieder verfügbar.
Aufruf/Verwendung: Aufruf:
from app.core.qdrant import QdrantConfig, get_client, ensure_collections, collection_names from app.core.qdrant import QdrantConfig, get_client, ensure_collections, ensure_payload_indexes
Umgebungsvariablen (optional):
QDRANT_URL | QDRANT_HOST/QDRANT_PORT, QDRANT_API_KEY,
COLLECTION_PREFIX (Default "mindnet"), VECTOR_DIM (Default 384)
Änderungen:
v1.3.1: Helper collection_names(prefix) wiederhergestellt (für reset_qdrant usw.).
v1.3.0: ensure_collections(..., destructive=False) keine stillen Drops im Dry-Run.
Edges-Collection nur bei explicit destructive=True neu anlegen.
v1.2.x: Konnte {prefix}_edges bei fehlender VectorConfig automatisch neu erstellen (riskant).
Bezug/Quelle:
Alte Core-Variante enthielt collection_names(prefix); diverse Scripts nutzen das weiterhin. :contentReference[oaicite:1]{index=1}
""" """
from __future__ import annotations from __future__ import annotations
import os import os
@ -36,10 +22,6 @@ from qdrant_client import QdrantClient
from qdrant_client.http import models as rest from qdrant_client.http import models as rest
# -------------------------------
# Konfiguration
# -------------------------------
@dataclass @dataclass
class QdrantConfig: class QdrantConfig:
url: str url: str
@ -60,18 +42,10 @@ class QdrantConfig:
return QdrantConfig(url=url, api_key=api_key, prefix=prefix, dim=dim) return QdrantConfig(url=url, api_key=api_key, prefix=prefix, dim=dim)
# -------------------------------
# Client
# -------------------------------
def get_client(cfg: QdrantConfig) -> QdrantClient: def get_client(cfg: QdrantConfig) -> QdrantClient:
return QdrantClient(url=cfg.url, api_key=cfg.api_key) return QdrantClient(url=cfg.url, api_key=cfg.api_key)
# -------------------------------
# Collection-Erzeuger (Hilfsfunktionen)
# -------------------------------
def _create_notes(client: QdrantClient, name: str, dim: int) -> None: def _create_notes(client: QdrantClient, name: str, dim: int) -> None:
if not client.collection_exists(name): if not client.collection_exists(name):
client.create_collection( client.create_collection(
@ -94,16 +68,7 @@ def _create_edges(client: QdrantClient, name: str) -> None:
) )
# -------------------------------
# Public API
# -------------------------------
def ensure_collections(client: QdrantClient, prefix: str, dim: int, destructive: bool = False) -> None: def ensure_collections(client: QdrantClient, prefix: str, dim: int, destructive: bool = False) -> None:
"""
Stellt sicher, dass die drei Collections existieren.
- Default **nicht destruktiv**: vorhandene Collections bleiben unangetastet.
- Nur wenn 'destructive=True', wird eine ungeeignete Edges-Collection gelöscht und neu angelegt.
"""
notes = f"{prefix}_notes" notes = f"{prefix}_notes"
chunks = f"{prefix}_chunks" chunks = f"{prefix}_chunks"
edges = f"{prefix}_edges" edges = f"{prefix}_edges"
@ -112,41 +77,48 @@ def ensure_collections(client: QdrantClient, prefix: str, dim: int, destructive:
_create_chunks(client, chunks, dim) _create_chunks(client, chunks, dim)
if client.collection_exists(edges): if client.collection_exists(edges):
# Prüfen, ob die Edges-Collection bereits eine Vektorkonfig hat
try: try:
info = client.get_collection(edges) info = client.get_collection(edges)
vectors_cfg = getattr(getattr(info.result, "config", None), "params", None) vectors_cfg = getattr(getattr(info.result, "config", None), "params", None)
has_vectors = getattr(vectors_cfg, "vectors", None) is not None has_vectors = getattr(vectors_cfg, "vectors", None) is not None
except Exception: except Exception:
# konservativ: nichts anfassen, um Datenverlust zu vermeiden
has_vectors = True has_vectors = True
if not has_vectors: if not has_vectors:
if destructive: if destructive:
client.delete_collection(edges) client.delete_collection(edges)
_create_edges(client, edges) _create_edges(client, edges)
else: else:
print( print(f"[ensure_collections] WARN: '{edges}' ohne VectorConfig; destructive=False.", flush=True)
f"[ensure_collections] WARN: '{edges}' ohne VectorConfig gefunden; "
f"keine destruktive Änderung (destructive=False).",
flush=True,
)
else: else:
_create_edges(client, edges) _create_edges(client, edges)
def collection_names(prefix: str) -> Tuple[str, str, str]: def collection_names(prefix: str) -> Tuple[str, str, str]:
"""
Abwärtskompatibler Helper für Scripts:
returns (f"{prefix}_notes", f"{prefix}_chunks", f"{prefix}_edges")
"""
return (f"{prefix}_notes", f"{prefix}_chunks", f"{prefix}_edges") return (f"{prefix}_notes", f"{prefix}_chunks", f"{prefix}_edges")
def wipe_collections(client: QdrantClient, prefix: str) -> None: # -------------------------------
""" # NEW: Payload-Indexing
Löscht alle drei Collections nur verwenden, wenn bewusst ein Clean-Rebuild gewünscht ist. # -------------------------------
"""
for name in collection_names(prefix): def _safe_create_index(client: QdrantClient, col: str, field: str, schema: rest.PayloadSchemaType):
if client.collection_exists(name): try:
client.delete_collection(name) client.create_payload_index(
collection_name=col,
field_name=field,
field_schema=schema,
)
except Exception:
# bereits vorhanden oder nicht unterstütztes Schema → ignorieren
pass
def ensure_payload_indexes(client: QdrantClient, prefix: str) -> None:
notes, chunks, edges = collection_names(prefix)
# Notes
_safe_create_index(client, notes, "note_id", rest.PayloadSchemaType.KEYWORD)
# Chunks
_safe_create_index(client, chunks, "note_id", rest.PayloadSchemaType.KEYWORD)
_safe_create_index(client, chunks, "chunk_index", rest.PayloadSchemaType.INTEGER)
# Edges
for f in ("kind", "scope", "source_id", "target_id", "note_id"):
_safe_create_index(client, edges, f, rest.PayloadSchemaType.KEYWORD)