diff --git a/app/core/qdrant.py b/app/core/qdrant.py index d248c09..2bf7766 100644 --- a/app/core/qdrant.py +++ b/app/core/qdrant.py @@ -2,31 +2,37 @@ # -*- coding: utf-8 -*- """ Name: app/core/qdrant.py -Version: v1.5.0 (2025-11-08) +Version: v1.6.0 (2025-11-08) Kurzbeschreibung: Qdrant-Client & Collection-Setup für mindnet. - Stellt sicher, dass {prefix}_notes / {prefix}_chunks / {prefix}_edges existieren. - Edges-Collection nutzt 1D Dummy-Vektor. - ensure_payload_indexes(...) legt sinnvolle Payload-Indizes an. - - **NEU (v1.5.0):** Abwärtskompatible Helfer: - * ensure_collections_for_prefix(...) → Wrapper für legacy-Importer - * count_points(client, collection) → stabile Zählfunktion (mit Fallback) - * get_counts_for_prefix(...) → Summary über alle drei Collections - * truncate_collections(...) → löscht *alle Punkte* in den Collections + +NEU / Änderungen: + v1.5.0: + * ensure_collections_for_prefix(...) → Wrapper für legacy-Importer + * count_points(client, collection) → stabile Zählfunktion (mit Fallback) + * get_counts_for_prefix(...) → Summary über alle drei Collections + * truncate_collections(...) → löscht *alle Punkte* in den Collections + v1.6.0: + * list_note_ids(client, notes_collection) → liefert alle payload.note_id-Werte + (wird von import_markdown.py v3.9.0 erwartet) Aufruf: from app.core.qdrant import ( QdrantConfig, get_client, ensure_collections, ensure_payload_indexes, ensure_collections_for_prefix, count_points, - collection_names, get_counts_for_prefix, truncate_collections + collection_names, get_counts_for_prefix, truncate_collections, + list_note_ids, ) """ from __future__ import annotations import os from dataclasses import dataclass -from typing import Optional, Tuple, Dict +from typing import Optional, Tuple, Dict, List from qdrant_client import QdrantClient from qdrant_client.http import models as rest @@ -66,6 +72,7 @@ def _create_notes(client: QdrantClient, name: str, dim: int) -> None: vectors_config=rest.VectorParams(size=dim, distance=rest.Distance.COSINE), ) + def _create_chunks(client: QdrantClient, name: str, dim: int) -> None: if not client.collection_exists(name): client.create_collection( @@ -73,6 +80,7 @@ def _create_chunks(client: QdrantClient, name: str, dim: int) -> None: vectors_config=rest.VectorParams(size=dim, distance=rest.Distance.COSINE), ) + def _create_edges(client: QdrantClient, name: str) -> None: if not client.collection_exists(name): client.create_collection( @@ -124,6 +132,7 @@ def _safe_create_index(client: QdrantClient, col: str, field: str, schema: rest. # bereits vorhanden oder nicht unterstütztes Schema → ignorieren pass + def ensure_payload_indexes(client: QdrantClient, prefix: str) -> None: notes, chunks, edges = collection_names(prefix) # Notes @@ -149,6 +158,7 @@ def ensure_collections_for_prefix(client: QdrantClient, prefix: str, dim: int, d ensure_payload_indexes(client, prefix) return collection_names(prefix) + def count_points(client: QdrantClient, collection: str) -> int: """ Zähle Punkte in einer Collection robust: @@ -157,11 +167,9 @@ def count_points(client: QdrantClient, collection: str) -> int: """ try: res = client.count(collection_name=collection, count_filter=None, exact=True) - # qdrant_client >=1.7: res.count cnt = getattr(res, "count", None) if isinstance(cnt, int): return cnt - # ältere Clients liefern evtl. ein Dict if isinstance(res, dict) and "count" in res: return int(res["count"]) except Exception: @@ -183,6 +191,7 @@ def count_points(client: QdrantClient, collection: str) -> int: break return total + def get_counts_for_prefix(client: QdrantClient, prefix: str) -> Dict[str, int]: notes, chunks, edges = collection_names(prefix) return { @@ -191,6 +200,7 @@ def get_counts_for_prefix(client: QdrantClient, prefix: str) -> Dict[str, int]: "edges": count_points(client, edges), } + def truncate_collections(client: QdrantClient, prefix: str) -> None: """ Löscht *alle Punkte* (nicht die Collections selber) für {prefix}. @@ -208,3 +218,56 @@ def truncate_collections(client: QdrantClient, prefix: str) -> None: except Exception: # Fallback: Collection ggf. leer/nicht vorhanden → ignorieren pass + + +# ------------------------------- +# NEU v1.6.0: list_note_ids +# ------------------------------- +def list_note_ids(client: QdrantClient, notes_collection: str, limit: int = 100000) -> List[str]: + """ + Liefert alle payload.note_id aus der angegebenen Notes-Collection. + - Wird von import_markdown.py (>= v3.9.0) verwendet, z.B. für Baseline-/Idempotenz-Checks. + - Robust gegen fehlende Felder: ignoriert Punkte ohne 'note_id'. + + Args: + client: QdrantClient + notes_collection: Name der Notes-Collection (z.B. 'mindnet_notes') + limit: harte Obergrenze für die Anzahl der zurückzugebenden IDs + + Returns: + Liste der note_id-Strings (ohne Duplikate, Reihenfolge nicht garantiert). + """ + out: List[str] = [] + seen = set() + next_page = None + fetched = 0 + + flt = None # kein Filter → alle Punkte + + while True: + # scroll_filter in neueren Clients; ältere akzeptieren 'scroll_filter' oder 'filter' + points, next_page = client.scroll( + collection_name=notes_collection, + scroll_filter=flt, + limit=min(512, max(1, limit - fetched)), + with_payload=True, + with_vectors=False, + offset=next_page, + ) + if not points: + break + + for p in points: + pl = p.payload or {} + nid = pl.get("note_id") + if isinstance(nid, str) and nid not in seen: + seen.add(nid) + out.append(nid) + fetched += 1 + if fetched >= limit: + return out + + if next_page is None: + break + + return out