app/core/qdrant.py aktualisiert
Some checks failed
Deploy mindnet to llm-node / deploy (push) Failing after 2s
Some checks failed
Deploy mindnet to llm-node / deploy (push) Failing after 2s
This commit is contained in:
parent
6f32c60c47
commit
8f48701ea0
|
|
@ -2,30 +2,16 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Name: app/core/qdrant.py
|
||||
Version: v1.3.1 (2025-09-05)
|
||||
Version: v1.4.0 (2025-09-09)
|
||||
|
||||
Kurzbeschreibung:
|
||||
Qdrant-Client & Collection-Setup für mindnet.
|
||||
- Stellt sicher, dass {prefix}_notes / {prefix}_chunks / {prefix}_edges existieren.
|
||||
- Edges-Collection nutzt 1D Dummy-Vektor (Workaround für Python-Client).
|
||||
- **Nicht-destruktiv per Default**: ensure_collections(..., destructive=False).
|
||||
- **Abwärtskompatibel**: collection_names(prefix) wieder verfügbar.
|
||||
- Edges-Collection nutzt 1D Dummy-Vektor.
|
||||
- NEW: ensure_payload_indexes(...) legt sinnvolle Payload-Indizes an.
|
||||
|
||||
Aufruf/Verwendung:
|
||||
from app.core.qdrant import QdrantConfig, get_client, ensure_collections, collection_names
|
||||
|
||||
Umgebungsvariablen (optional):
|
||||
QDRANT_URL | QDRANT_HOST/QDRANT_PORT, QDRANT_API_KEY,
|
||||
COLLECTION_PREFIX (Default "mindnet"), VECTOR_DIM (Default 384)
|
||||
|
||||
Änderungen:
|
||||
v1.3.1: Helper collection_names(prefix) wiederhergestellt (für reset_qdrant usw.).
|
||||
v1.3.0: ensure_collections(..., destructive=False) – keine stillen Drops im Dry-Run.
|
||||
Edges-Collection nur bei explicit destructive=True neu anlegen.
|
||||
≤v1.2.x: Konnte {prefix}_edges bei fehlender VectorConfig automatisch neu erstellen (riskant).
|
||||
|
||||
Bezug/Quelle:
|
||||
Alte Core-Variante enthielt collection_names(prefix); diverse Scripts nutzen das weiterhin. :contentReference[oaicite:1]{index=1}
|
||||
Aufruf:
|
||||
from app.core.qdrant import QdrantConfig, get_client, ensure_collections, ensure_payload_indexes
|
||||
"""
|
||||
from __future__ import annotations
|
||||
import os
|
||||
|
|
@ -36,10 +22,6 @@ from qdrant_client import QdrantClient
|
|||
from qdrant_client.http import models as rest
|
||||
|
||||
|
||||
# -------------------------------
|
||||
# Konfiguration
|
||||
# -------------------------------
|
||||
|
||||
@dataclass
|
||||
class QdrantConfig:
|
||||
url: str
|
||||
|
|
@ -60,18 +42,10 @@ class QdrantConfig:
|
|||
return QdrantConfig(url=url, api_key=api_key, prefix=prefix, dim=dim)
|
||||
|
||||
|
||||
# -------------------------------
|
||||
# Client
|
||||
# -------------------------------
|
||||
|
||||
def get_client(cfg: QdrantConfig) -> QdrantClient:
|
||||
return QdrantClient(url=cfg.url, api_key=cfg.api_key)
|
||||
|
||||
|
||||
# -------------------------------
|
||||
# Collection-Erzeuger (Hilfsfunktionen)
|
||||
# -------------------------------
|
||||
|
||||
def _create_notes(client: QdrantClient, name: str, dim: int) -> None:
|
||||
if not client.collection_exists(name):
|
||||
client.create_collection(
|
||||
|
|
@ -94,16 +68,7 @@ def _create_edges(client: QdrantClient, name: str) -> None:
|
|||
)
|
||||
|
||||
|
||||
# -------------------------------
|
||||
# Public API
|
||||
# -------------------------------
|
||||
|
||||
def ensure_collections(client: QdrantClient, prefix: str, dim: int, destructive: bool = False) -> None:
|
||||
"""
|
||||
Stellt sicher, dass die drei Collections existieren.
|
||||
- Default **nicht destruktiv**: vorhandene Collections bleiben unangetastet.
|
||||
- Nur wenn 'destructive=True', wird eine ungeeignete Edges-Collection gelöscht und neu angelegt.
|
||||
"""
|
||||
notes = f"{prefix}_notes"
|
||||
chunks = f"{prefix}_chunks"
|
||||
edges = f"{prefix}_edges"
|
||||
|
|
@ -112,41 +77,48 @@ def ensure_collections(client: QdrantClient, prefix: str, dim: int, destructive:
|
|||
_create_chunks(client, chunks, dim)
|
||||
|
||||
if client.collection_exists(edges):
|
||||
# Prüfen, ob die Edges-Collection bereits eine Vektorkonfig hat
|
||||
try:
|
||||
info = client.get_collection(edges)
|
||||
vectors_cfg = getattr(getattr(info.result, "config", None), "params", None)
|
||||
has_vectors = getattr(vectors_cfg, "vectors", None) is not None
|
||||
except Exception:
|
||||
# konservativ: nichts anfassen, um Datenverlust zu vermeiden
|
||||
has_vectors = True
|
||||
|
||||
if not has_vectors:
|
||||
if destructive:
|
||||
client.delete_collection(edges)
|
||||
_create_edges(client, edges)
|
||||
else:
|
||||
print(
|
||||
f"[ensure_collections] WARN: '{edges}' ohne VectorConfig gefunden; "
|
||||
f"keine destruktive Änderung (destructive=False).",
|
||||
flush=True,
|
||||
)
|
||||
print(f"[ensure_collections] WARN: '{edges}' ohne VectorConfig; destructive=False.", flush=True)
|
||||
else:
|
||||
_create_edges(client, edges)
|
||||
|
||||
|
||||
def collection_names(prefix: str) -> Tuple[str, str, str]:
|
||||
"""
|
||||
Abwärtskompatibler Helper für Scripts:
|
||||
returns (f"{prefix}_notes", f"{prefix}_chunks", f"{prefix}_edges")
|
||||
"""
|
||||
return (f"{prefix}_notes", f"{prefix}_chunks", f"{prefix}_edges")
|
||||
|
||||
|
||||
def wipe_collections(client: QdrantClient, prefix: str) -> None:
|
||||
"""
|
||||
Löscht alle drei Collections – nur verwenden, wenn bewusst ein Clean-Rebuild gewünscht ist.
|
||||
"""
|
||||
for name in collection_names(prefix):
|
||||
if client.collection_exists(name):
|
||||
client.delete_collection(name)
|
||||
# -------------------------------
|
||||
# NEW: Payload-Indexing
|
||||
# -------------------------------
|
||||
|
||||
def _safe_create_index(client: QdrantClient, col: str, field: str, schema: rest.PayloadSchemaType):
|
||||
try:
|
||||
client.create_payload_index(
|
||||
collection_name=col,
|
||||
field_name=field,
|
||||
field_schema=schema,
|
||||
)
|
||||
except Exception:
|
||||
# bereits vorhanden oder nicht unterstütztes Schema → ignorieren
|
||||
pass
|
||||
|
||||
def ensure_payload_indexes(client: QdrantClient, prefix: str) -> None:
|
||||
notes, chunks, edges = collection_names(prefix)
|
||||
# Notes
|
||||
_safe_create_index(client, notes, "note_id", rest.PayloadSchemaType.KEYWORD)
|
||||
# Chunks
|
||||
_safe_create_index(client, chunks, "note_id", rest.PayloadSchemaType.KEYWORD)
|
||||
_safe_create_index(client, chunks, "chunk_index", rest.PayloadSchemaType.INTEGER)
|
||||
# Edges
|
||||
for f in ("kind", "scope", "source_id", "target_id", "note_id"):
|
||||
_safe_create_index(client, edges, f, rest.PayloadSchemaType.KEYWORD)
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user