#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ scripts/diag_payload_indexes.py Listet die Payload-Indizes (payload_schema) für alle Mindnet-Collections. Nutzt client.get_collection(name) und zeigt: - payload_schema (Feld -> Schema/Typ) - Vector-Konfiguration (size, distance, named vectors) - Segment-Infos (optional komprimiert) Aufruf: python3 -m scripts.diag_payload_indexes python3 -m scripts.diag_payload_indexes --raw # vollständiges JSON Hinweis: In Qdrant werden die Payload-Indizes im Collection-Info-Endpunkt unter `result.payload_schema` geliefert. """ from __future__ import annotations import argparse, json from qdrant_client.http import models as rest from app.core.qdrant import QdrantConfig, get_client, collection_names def compact_schema(ps: dict) -> dict: out = {} for k, v in (ps or {}).items(): # v ist z.B. {"type":"keyword"} oder {"type":"text"} etc. if isinstance(v, dict): out[k] = v.get("type") or v else: out[k] = v return out def get_info(client, name: str) -> dict: info = client.get_collection(collection_name=name) # Das SDK liefert .dict(). Wir extrahieren das Wesentliche. d = info.dict() if hasattr(info, "dict") else info result = d.get("result", {}) vectors = result.get("vectors") or {} if isinstance(vectors, dict) and "config" in vectors: vectors = vectors.get("config") payload_schema = result.get("payload_schema") or {} segments = result.get("segments_count") return { "name": name, "vectors": vectors, "payload_schema": compact_schema(payload_schema), "segments_count": segments, } def main(): ap = argparse.ArgumentParser() ap.add_argument("--raw", action="store_true", help="Gibt das rohe get_collection-Result zurück") args = ap.parse_args() cfg = QdrantConfig.from_env() client = get_client(cfg) notes, chunks, edges = collection_names(cfg.prefix) cols = [notes, chunks, edges] if args.raw: for name in cols: info = client.get_collection(collection_name=name) d = info.dict() if hasattr(info, "dict") else info print(json.dumps({"collection": name, "raw": d}, ensure_ascii=False)) else: out = [ get_info(client, name) for name in cols ] print(json.dumps({"prefix": cfg.prefix, "collections": out}, ensure_ascii=False, indent=2)) if __name__ == "__main__": main()