From eafa0fad859c38e66f1eea451d719005f88a5d47 Mon Sep 17 00:00:00 2001 From: Lars Date: Sun, 16 Nov 2025 17:36:09 +0100 Subject: [PATCH] Dateien nach "tests" hochladen --- tests/diag_payload_indexes.py | 73 +++++++++++++------------------- tests/ensure_indexes_and_show.py | 18 +++----- 2 files changed, 36 insertions(+), 55 deletions(-) diff --git a/tests/diag_payload_indexes.py b/tests/diag_payload_indexes.py index 204b891..1418e62 100644 --- a/tests/diag_payload_indexes.py +++ b/tests/diag_payload_indexes.py @@ -1,56 +1,31 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- """ -scripts/diag_payload_indexes.py -Listet die Payload-Indizes (payload_schema) für alle Mindnet-Collections. +scripts/diag_payload_indexes.py (v1.1) -Nutzt client.get_collection(name) und zeigt: -- payload_schema (Feld -> Schema/Typ) -- Vector-Konfiguration (size, distance, named vectors) -- Segment-Infos (optional komprimiert) - -Aufruf: - python3 -m scripts.diag_payload_indexes - python3 -m scripts.diag_payload_indexes --raw # vollständiges JSON - -Hinweis: In Qdrant werden die Payload-Indizes im Collection-Info-Endpunkt unter -`result.payload_schema` geliefert. +Zeigt payload_schema (Indizes) je Collection. +WICHTIG: Einige Qdrant-Versionen liefern payload_schema nur, wenn +`with_payload_schema=true` gesetzt wird. Daher setzen wir das Flag explizit. """ from __future__ import annotations import argparse, json from qdrant_client.http import models as rest from app.core.qdrant import QdrantConfig, get_client, collection_names -def compact_schema(ps: dict) -> dict: +def compact_schema(ps: dict | None) -> dict: + if not isinstance(ps, dict): + return {} out = {} - for k, v in (ps or {}).items(): - # v ist z.B. {"type":"keyword"} oder {"type":"text"} etc. - if isinstance(v, dict): - out[k] = v.get("type") or v + for k, v in ps.items(): + if isinstance(v, dict) and "type" in v: + out[k] = v["type"] else: out[k] = v return out -def get_info(client, name: str) -> dict: - info = client.get_collection(collection_name=name) - # Das SDK liefert .dict(). Wir extrahieren das Wesentliche. - d = info.dict() if hasattr(info, "dict") else info - result = d.get("result", {}) - vectors = result.get("vectors") or {} - if isinstance(vectors, dict) and "config" in vectors: - vectors = vectors.get("config") - payload_schema = result.get("payload_schema") or {} - segments = result.get("segments_count") - return { - "name": name, - "vectors": vectors, - "payload_schema": compact_schema(payload_schema), - "segments_count": segments, - } - def main(): ap = argparse.ArgumentParser() - ap.add_argument("--raw", action="store_true", help="Gibt das rohe get_collection-Result zurück") + ap.add_argument("--raw", action="store_true") args = ap.parse_args() cfg = QdrantConfig.from_env() @@ -58,14 +33,24 @@ def main(): notes, chunks, edges = collection_names(cfg.prefix) cols = [notes, chunks, edges] - if args.raw: - for name in cols: - info = client.get_collection(collection_name=name) - d = info.dict() if hasattr(info, "dict") else info - print(json.dumps({"collection": name, "raw": d}, ensure_ascii=False)) - else: - out = [ get_info(client, name) for name in cols ] - print(json.dumps({"prefix": cfg.prefix, "collections": out}, ensure_ascii=False, indent=2)) + result = [] + for name in cols: + info = client.get_collection(collection_name=name, with_payload_schema=True) + d = info.model_dump() if hasattr(info, "model_dump") else (info.dict() if hasattr(info, "dict") else info) + payload_schema = (d.get("result") or {}).get("payload_schema") + vectors = (d.get("result") or {}).get("vectors") + if isinstance(vectors, dict) and "config" in vectors: + vectors = vectors.get("config") + if args.raw: + result.append({"collection": name, "raw": d}) + else: + result.append({ + "name": name, + "vectors": vectors, + "payload_schema": compact_schema(payload_schema), + "segments_count": (d.get("result") or {}).get("segments_count"), + }) + print(json.dumps({"prefix": cfg.prefix, "collections": result}, ensure_ascii=False, indent=2)) if __name__ == "__main__": main() diff --git a/tests/ensure_indexes_and_show.py b/tests/ensure_indexes_and_show.py index 3b9aa8e..a8183d5 100644 --- a/tests/ensure_indexes_and_show.py +++ b/tests/ensure_indexes_and_show.py @@ -1,11 +1,9 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- """ -scripts/ensure_indexes_and_show.py -Erzwingt/prüft die Index-Erstellung via ensure_payload_indexes(...) und zeigt danach das payload_schema. - -Aufruf: - python3 -m scripts.ensure_indexes_and_show +tests/ensure_indexes_and_show.py (v1.0) +Alias für die Test-Suite: ruft ensure_payload_indexes(...) auf +und zeigt danach das payload_schema (mit with_payload_schema=True). """ from __future__ import annotations import json @@ -15,14 +13,12 @@ def main(): cfg = QdrantConfig.from_env() client = get_client(cfg) ensure_payload_indexes(client, cfg.prefix) - # Danach anzeigen notes, chunks, edges = collection_names(cfg.prefix) - cols = [notes, chunks, edges] res = {} - for name in cols: - info = client.get_collection(collection_name=name) - d = info.dict() if hasattr(info, "dict") else info - res[name] = d.get("result", {}).get("payload_schema") + for name in (notes, chunks, edges): + info = client.get_collection(collection_name=name, with_payload_schema=True) + d = info.model_dump() if hasattr(info, "model_dump") else (info.dict() if hasattr(info, "dict") else info) + res[name] = (d.get("result") or {}).get("payload_schema") print(json.dumps(res, ensure_ascii=False, indent=2)) if __name__ == "__main__":