From 487e28a904e15cc18ca1d9eb41bceb4a7f74636c Mon Sep 17 00:00:00 2001 From: Lars Date: Tue, 11 Nov 2025 17:49:10 +0100 Subject: [PATCH] Dateien nach "tests" hochladen --- tests/diag_payload_indexes.py | 71 ++++++++++++++++++++++++++++++++ tests/ensure_indexes_and_show.py | 29 +++++++++++++ 2 files changed, 100 insertions(+) create mode 100644 tests/diag_payload_indexes.py create mode 100644 tests/ensure_indexes_and_show.py diff --git a/tests/diag_payload_indexes.py b/tests/diag_payload_indexes.py new file mode 100644 index 0000000..204b891 --- /dev/null +++ b/tests/diag_payload_indexes.py @@ -0,0 +1,71 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +scripts/diag_payload_indexes.py +Listet die Payload-Indizes (payload_schema) für alle Mindnet-Collections. + +Nutzt client.get_collection(name) und zeigt: +- payload_schema (Feld -> Schema/Typ) +- Vector-Konfiguration (size, distance, named vectors) +- Segment-Infos (optional komprimiert) + +Aufruf: + python3 -m scripts.diag_payload_indexes + python3 -m scripts.diag_payload_indexes --raw # vollständiges JSON + +Hinweis: In Qdrant werden die Payload-Indizes im Collection-Info-Endpunkt unter +`result.payload_schema` geliefert. +""" +from __future__ import annotations +import argparse, json +from qdrant_client.http import models as rest +from app.core.qdrant import QdrantConfig, get_client, collection_names + +def compact_schema(ps: dict) -> dict: + out = {} + for k, v in (ps or {}).items(): + # v ist z.B. {"type":"keyword"} oder {"type":"text"} etc. + if isinstance(v, dict): + out[k] = v.get("type") or v + else: + out[k] = v + return out + +def get_info(client, name: str) -> dict: + info = client.get_collection(collection_name=name) + # Das SDK liefert .dict(). Wir extrahieren das Wesentliche. + d = info.dict() if hasattr(info, "dict") else info + result = d.get("result", {}) + vectors = result.get("vectors") or {} + if isinstance(vectors, dict) and "config" in vectors: + vectors = vectors.get("config") + payload_schema = result.get("payload_schema") or {} + segments = result.get("segments_count") + return { + "name": name, + "vectors": vectors, + "payload_schema": compact_schema(payload_schema), + "segments_count": segments, + } + +def main(): + ap = argparse.ArgumentParser() + ap.add_argument("--raw", action="store_true", help="Gibt das rohe get_collection-Result zurück") + args = ap.parse_args() + + cfg = QdrantConfig.from_env() + client = get_client(cfg) + notes, chunks, edges = collection_names(cfg.prefix) + + cols = [notes, chunks, edges] + if args.raw: + for name in cols: + info = client.get_collection(collection_name=name) + d = info.dict() if hasattr(info, "dict") else info + print(json.dumps({"collection": name, "raw": d}, ensure_ascii=False)) + else: + out = [ get_info(client, name) for name in cols ] + print(json.dumps({"prefix": cfg.prefix, "collections": out}, ensure_ascii=False, indent=2)) + +if __name__ == "__main__": + main() diff --git a/tests/ensure_indexes_and_show.py b/tests/ensure_indexes_and_show.py new file mode 100644 index 0000000..3b9aa8e --- /dev/null +++ b/tests/ensure_indexes_and_show.py @@ -0,0 +1,29 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +scripts/ensure_indexes_and_show.py +Erzwingt/prüft die Index-Erstellung via ensure_payload_indexes(...) und zeigt danach das payload_schema. + +Aufruf: + python3 -m scripts.ensure_indexes_and_show +""" +from __future__ import annotations +import json +from app.core.qdrant import QdrantConfig, get_client, ensure_payload_indexes, collection_names + +def main(): + cfg = QdrantConfig.from_env() + client = get_client(cfg) + ensure_payload_indexes(client, cfg.prefix) + # Danach anzeigen + notes, chunks, edges = collection_names(cfg.prefix) + cols = [notes, chunks, edges] + res = {} + for name in cols: + info = client.get_collection(collection_name=name) + d = info.dict() if hasattr(info, "dict") else info + res[name] = d.get("result", {}).get("payload_schema") + print(json.dumps(res, ensure_ascii=False, indent=2)) + +if __name__ == "__main__": + main()