From b241ae753b25de6822b0549d0a651f6c5d5b7010 Mon Sep 17 00:00:00 2001 From: Lars Date: Sat, 8 Nov 2025 09:42:54 +0100 Subject: [PATCH] app/core/qdrant.py aktualisiert --- app/core/qdrant.py | 100 +++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 93 insertions(+), 7 deletions(-) diff --git a/app/core/qdrant.py b/app/core/qdrant.py index 8cea113..d248c09 100644 --- a/app/core/qdrant.py +++ b/app/core/qdrant.py @@ -2,21 +2,31 @@ # -*- coding: utf-8 -*- """ Name: app/core/qdrant.py -Version: v1.4.0 (2025-09-09) +Version: v1.5.0 (2025-11-08) Kurzbeschreibung: Qdrant-Client & Collection-Setup für mindnet. - Stellt sicher, dass {prefix}_notes / {prefix}_chunks / {prefix}_edges existieren. - Edges-Collection nutzt 1D Dummy-Vektor. - - NEW: ensure_payload_indexes(...) legt sinnvolle Payload-Indizes an. + - ensure_payload_indexes(...) legt sinnvolle Payload-Indizes an. + - **NEU (v1.5.0):** Abwärtskompatible Helfer: + * ensure_collections_for_prefix(...) → Wrapper für legacy-Importer + * count_points(client, collection) → stabile Zählfunktion (mit Fallback) + * get_counts_for_prefix(...) → Summary über alle drei Collections + * truncate_collections(...) → löscht *alle Punkte* in den Collections Aufruf: - from app.core.qdrant import QdrantConfig, get_client, ensure_collections, ensure_payload_indexes + from app.core.qdrant import ( + QdrantConfig, get_client, + ensure_collections, ensure_payload_indexes, + ensure_collections_for_prefix, count_points, + collection_names, get_counts_for_prefix, truncate_collections + ) """ from __future__ import annotations import os from dataclasses import dataclass -from typing import Optional, Tuple +from typing import Optional, Tuple, Dict from qdrant_client import QdrantClient from qdrant_client.http import models as rest @@ -46,6 +56,9 @@ def get_client(cfg: QdrantConfig) -> QdrantClient: return QdrantClient(url=cfg.url, api_key=cfg.api_key) +# ------------------------------- +# Collection-Erstellung +# ------------------------------- def _create_notes(client: QdrantClient, name: str, dim: int) -> None: if not client.collection_exists(name): client.create_collection( @@ -98,9 +111,8 @@ def collection_names(prefix: str) -> Tuple[str, str, str]: # ------------------------------- -# NEW: Payload-Indexing +# Payload-Indexing # ------------------------------- - def _safe_create_index(client: QdrantClient, col: str, field: str, schema: rest.PayloadSchemaType): try: client.create_payload_index( @@ -119,6 +131,80 @@ def ensure_payload_indexes(client: QdrantClient, prefix: str) -> None: # Chunks _safe_create_index(client, chunks, "note_id", rest.PayloadSchemaType.KEYWORD) _safe_create_index(client, chunks, "chunk_index", rest.PayloadSchemaType.INTEGER) + _safe_create_index(client, chunks, "chunk_id", rest.PayloadSchemaType.KEYWORD) # Edges - for f in ("kind", "scope", "source_id", "target_id", "note_id"): + for f in ("kind", "scope", "source_id", "target_id", "note_id", "edge_id"): _safe_create_index(client, edges, f, rest.PayloadSchemaType.KEYWORD) + + +# ------------------------------- +# NEU: Abwärtskompatible Helfer +# ------------------------------- +def ensure_collections_for_prefix(client: QdrantClient, prefix: str, dim: int, destructive: bool = False) -> Tuple[str, str, str]: + """ + Legacy-Wrapper, damit ältere Skripte (Importer bis v3.7.x) funktionieren. + Gibt die Collection-Namen zurück. + """ + ensure_collections(client, prefix, dim, destructive=destructive) + ensure_payload_indexes(client, prefix) + return collection_names(prefix) + +def count_points(client: QdrantClient, collection: str) -> int: + """ + Zähle Punkte in einer Collection robust: + 1) bevorzugt client.count(..., exact=True) + 2) Fallback: Scrollen ohne Filter und mitzählen + """ + try: + res = client.count(collection_name=collection, count_filter=None, exact=True) + # qdrant_client >=1.7: res.count + cnt = getattr(res, "count", None) + if isinstance(cnt, int): + return cnt + # ältere Clients liefern evtl. ein Dict + if isinstance(res, dict) and "count" in res: + return int(res["count"]) + except Exception: + pass + + # Fallback via Scroll + total = 0 + next_page = None + while True: + points, next_page = client.scroll( + collection_name=collection, + limit=2048, + with_payload=False, + with_vectors=False, + offset=next_page, + ) + total += len(points) + if next_page is None or not points: + break + return total + +def get_counts_for_prefix(client: QdrantClient, prefix: str) -> Dict[str, int]: + notes, chunks, edges = collection_names(prefix) + return { + "notes": count_points(client, notes), + "chunks": count_points(client, chunks), + "edges": count_points(client, edges), + } + +def truncate_collections(client: QdrantClient, prefix: str) -> None: + """ + Löscht *alle Punkte* (nicht die Collections selber) für {prefix}. + Entspricht funktional einem "truncate" in deinem Reset-Skript. + """ + for col in collection_names(prefix): + try: + client.delete( + collection_name=col, + points_selector=rest.FilterSelector( + filter=rest.Filter(must=[]) # leeres Filter => alle Punkte + ), + wait=True, + ) + except Exception: + # Fallback: Collection ggf. leer/nicht vorhanden → ignorieren + pass