mindnet/scripts/debug_qdrant_state.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
FILE: scripts/debug_qdrant_state.py
VERSION: 2.1.0 (2025-12-15)
STATUS: Active (Debug-Tool)
COMPATIBILITY: v2.9.1 (Post-WP14/WP-15b)

Zweck:
-------
Zeigt schnelle Übersicht über den Qdrant-Zustand.
Gibt Prefix, Collections, Punkt-Anzahlen und Beispiel-IDs aus.

Funktionsweise:
---------------
1. Verbindet mit Qdrant
2. Ermittelt Collections für das Präfix
3. Zählt Points in jeder Collection
4. Extrahiert Beispiel-IDs (erste 5)

Ergebnis-Interpretation:
------------------------
- Ausgabe: JSON mit Zustands-Übersicht
  * prefix: Collection-Präfix
  * collections: Namen der Collections
  * counts: Punkt-Anzahlen pro Collection
  * samples: Beispiel-IDs pro Collection

Verwendung:
-----------
- Schnelle Status-Prüfung
- Debugging von Verbindungsproblemen
- Validierung der Collection-Struktur

Hinweise:
---------
- Nutzt count() für exakte Zählung (falls verfügbar)
- Fallback auf scroll() bei Problemen

Aufruf:
-------
python3 -m scripts.debug_qdrant_state
python3 -m scripts.debug_qdrant_state --prefix mindnet_dev

Parameter:
----------
--prefix TEXT   Collection-Präfix (Default: ENV COLLECTION_PREFIX oder mindnet)

Änderungen:
-----------
v2.1.0 (2025-12-15): Dokumentation aktualisiert
v1.0.0: Initial Release
"""
from __future__ import annotations
import argparse, os, json
from app.core.qdrant import QdrantConfig, get_client

def count_points(client, collection: str) -> int:
    try:
        res = client.count(collection, exact=True)
        return int(getattr(res, "count", 0))
    except Exception:
        pts, _ = client.scroll(collection, limit=1)
        return 1 if pts else 0

def sample_ids(client, collection: str, id_key: str, limit: int = 5):
    pts, _ = client.scroll(collection, with_payload=True, with_vectors=False, limit=limit)
    out = []
    for p in pts or []:
        pl = p.payload or {}
        if id_key in pl:
            out.append(pl[id_key])
    return out

def main():
    ap = argparse.ArgumentParser()
    ap.add_argument("--prefix", default=os.environ.get("COLLECTION_PREFIX", "mindnet"))
    args = ap.parse_args()

    cfg = QdrantConfig.from_env()
    cfg.prefix = args.prefix
    client = get_client(cfg)

    notes = f"{cfg.prefix}_notes"
    chunks = f"{cfg.prefix}_chunks"
    edges = f"{cfg.prefix}_edges"

    print(json.dumps({
        "prefix": cfg.prefix,
        "collections": {"notes": notes, "chunks": chunks, "edges": edges},
        "counts": {
            "notes": count_points(client, notes),
            "chunks": count_points(client, chunks),
            "edges": count_points(client, edges),
        },
        "samples": {
            "notes.note_id": sample_ids(client, notes, "note_id"),
            "chunks.note_id": sample_ids(client, chunks, "note_id"),
            "edges.note_id": sample_ids(client, edges, "note_id"),
        }
    }, ensure_ascii=False, indent=2))

if __name__ == "__main__":
    main()