app/core/qdrant.py aktualisiert
All checks were successful
Deploy mindnet to llm-node / deploy (push) Successful in 3s

This commit is contained in:
Lars 2025-11-08 09:42:54 +01:00
parent 2041771b14
commit b241ae753b

View File

@ -2,21 +2,31 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
""" """
Name: app/core/qdrant.py Name: app/core/qdrant.py
Version: v1.4.0 (2025-09-09) Version: v1.5.0 (2025-11-08)
Kurzbeschreibung: Kurzbeschreibung:
Qdrant-Client & Collection-Setup für mindnet. Qdrant-Client & Collection-Setup für mindnet.
- Stellt sicher, dass {prefix}_notes / {prefix}_chunks / {prefix}_edges existieren. - Stellt sicher, dass {prefix}_notes / {prefix}_chunks / {prefix}_edges existieren.
- Edges-Collection nutzt 1D Dummy-Vektor. - Edges-Collection nutzt 1D Dummy-Vektor.
- NEW: ensure_payload_indexes(...) legt sinnvolle Payload-Indizes an. - ensure_payload_indexes(...) legt sinnvolle Payload-Indizes an.
- **NEU (v1.5.0):** Abwärtskompatible Helfer:
* ensure_collections_for_prefix(...) Wrapper für legacy-Importer
* count_points(client, collection) stabile Zählfunktion (mit Fallback)
* get_counts_for_prefix(...) Summary über alle drei Collections
* truncate_collections(...) löscht *alle Punkte* in den Collections
Aufruf: Aufruf:
from app.core.qdrant import QdrantConfig, get_client, ensure_collections, ensure_payload_indexes from app.core.qdrant import (
QdrantConfig, get_client,
ensure_collections, ensure_payload_indexes,
ensure_collections_for_prefix, count_points,
collection_names, get_counts_for_prefix, truncate_collections
)
""" """
from __future__ import annotations from __future__ import annotations
import os import os
from dataclasses import dataclass from dataclasses import dataclass
from typing import Optional, Tuple from typing import Optional, Tuple, Dict
from qdrant_client import QdrantClient from qdrant_client import QdrantClient
from qdrant_client.http import models as rest from qdrant_client.http import models as rest
@ -46,6 +56,9 @@ def get_client(cfg: QdrantConfig) -> QdrantClient:
return QdrantClient(url=cfg.url, api_key=cfg.api_key) return QdrantClient(url=cfg.url, api_key=cfg.api_key)
# -------------------------------
# Collection-Erstellung
# -------------------------------
def _create_notes(client: QdrantClient, name: str, dim: int) -> None: def _create_notes(client: QdrantClient, name: str, dim: int) -> None:
if not client.collection_exists(name): if not client.collection_exists(name):
client.create_collection( client.create_collection(
@ -98,9 +111,8 @@ def collection_names(prefix: str) -> Tuple[str, str, str]:
# ------------------------------- # -------------------------------
# NEW: Payload-Indexing # Payload-Indexing
# ------------------------------- # -------------------------------
def _safe_create_index(client: QdrantClient, col: str, field: str, schema: rest.PayloadSchemaType): def _safe_create_index(client: QdrantClient, col: str, field: str, schema: rest.PayloadSchemaType):
try: try:
client.create_payload_index( client.create_payload_index(
@ -119,6 +131,80 @@ def ensure_payload_indexes(client: QdrantClient, prefix: str) -> None:
# Chunks # Chunks
_safe_create_index(client, chunks, "note_id", rest.PayloadSchemaType.KEYWORD) _safe_create_index(client, chunks, "note_id", rest.PayloadSchemaType.KEYWORD)
_safe_create_index(client, chunks, "chunk_index", rest.PayloadSchemaType.INTEGER) _safe_create_index(client, chunks, "chunk_index", rest.PayloadSchemaType.INTEGER)
_safe_create_index(client, chunks, "chunk_id", rest.PayloadSchemaType.KEYWORD)
# Edges # Edges
for f in ("kind", "scope", "source_id", "target_id", "note_id"): for f in ("kind", "scope", "source_id", "target_id", "note_id", "edge_id"):
_safe_create_index(client, edges, f, rest.PayloadSchemaType.KEYWORD) _safe_create_index(client, edges, f, rest.PayloadSchemaType.KEYWORD)
# -------------------------------
# NEU: Abwärtskompatible Helfer
# -------------------------------
def ensure_collections_for_prefix(client: QdrantClient, prefix: str, dim: int, destructive: bool = False) -> Tuple[str, str, str]:
"""
Legacy-Wrapper, damit ältere Skripte (Importer bis v3.7.x) funktionieren.
Gibt die Collection-Namen zurück.
"""
ensure_collections(client, prefix, dim, destructive=destructive)
ensure_payload_indexes(client, prefix)
return collection_names(prefix)
def count_points(client: QdrantClient, collection: str) -> int:
"""
Zähle Punkte in einer Collection robust:
1) bevorzugt client.count(..., exact=True)
2) Fallback: Scrollen ohne Filter und mitzählen
"""
try:
res = client.count(collection_name=collection, count_filter=None, exact=True)
# qdrant_client >=1.7: res.count
cnt = getattr(res, "count", None)
if isinstance(cnt, int):
return cnt
# ältere Clients liefern evtl. ein Dict
if isinstance(res, dict) and "count" in res:
return int(res["count"])
except Exception:
pass
# Fallback via Scroll
total = 0
next_page = None
while True:
points, next_page = client.scroll(
collection_name=collection,
limit=2048,
with_payload=False,
with_vectors=False,
offset=next_page,
)
total += len(points)
if next_page is None or not points:
break
return total
def get_counts_for_prefix(client: QdrantClient, prefix: str) -> Dict[str, int]:
notes, chunks, edges = collection_names(prefix)
return {
"notes": count_points(client, notes),
"chunks": count_points(client, chunks),
"edges": count_points(client, edges),
}
def truncate_collections(client: QdrantClient, prefix: str) -> None:
"""
Löscht *alle Punkte* (nicht die Collections selber) für {prefix}.
Entspricht funktional einem "truncate" in deinem Reset-Skript.
"""
for col in collection_names(prefix):
try:
client.delete(
collection_name=col,
points_selector=rest.FilterSelector(
filter=rest.Filter(must=[]) # leeres Filter => alle Punkte
),
wait=True,
)
except Exception:
# Fallback: Collection ggf. leer/nicht vorhanden → ignorieren
pass