Dateien nach "tests" hochladen

2025-11-11 17:49:10 +01:00 · 2025-11-11 17:49:10 +01:00 · 487e28a904
commit 487e28a904
parent c492f97a67
2 changed files with 100 additions and 0 deletions
--- a/tests/diag_payload_indexes.py
+++ b/tests/diag_payload_indexes.py
@ -0,0 +1,71 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+scripts/diag_payload_indexes.py
+Listet die Payload-Indizes (payload_schema) für alle Mindnet-Collections.
+
+Nutzt client.get_collection(name) und zeigt:
+- payload_schema (Feld -> Schema/Typ)
+- Vector-Konfiguration (size, distance, named vectors)
+- Segment-Infos (optional komprimiert)
+
+Aufruf:
+  python3 -m scripts.diag_payload_indexes
+  python3 -m scripts.diag_payload_indexes --raw   # vollständiges JSON
+
+Hinweis: In Qdrant werden die Payload-Indizes im Collection-Info-Endpunkt unter
+`result.payload_schema` geliefert.
+"""
+from __future__ import annotations
+import argparse, json
+from qdrant_client.http import models as rest
+from app.core.qdrant import QdrantConfig, get_client, collection_names
+
+def compact_schema(ps: dict) -> dict:
+    out = {}
+    for k, v in (ps or {}).items():
+        # v ist z.B. {"type":"keyword"} oder {"type":"text"} etc.
+        if isinstance(v, dict):
+            out[k] = v.get("type") or v
+        else:
+            out[k] = v
+    return out
+
+def get_info(client, name: str) -> dict:
+    info = client.get_collection(collection_name=name)
+    # Das SDK liefert .dict(). Wir extrahieren das Wesentliche.
+    d = info.dict() if hasattr(info, "dict") else info
+    result = d.get("result", {})
+    vectors = result.get("vectors") or {}
+    if isinstance(vectors, dict) and "config" in vectors:
+        vectors = vectors.get("config")
+    payload_schema = result.get("payload_schema") or {}
+    segments = result.get("segments_count")
+    return {
+        "name": name,
+        "vectors": vectors,
+        "payload_schema": compact_schema(payload_schema),
+        "segments_count": segments,
+    }
+
+def main():
+    ap = argparse.ArgumentParser()
+    ap.add_argument("--raw", action="store_true", help="Gibt das rohe get_collection-Result zurück")
+    args = ap.parse_args()
+
+    cfg = QdrantConfig.from_env()
+    client = get_client(cfg)
+    notes, chunks, edges = collection_names(cfg.prefix)
+
+    cols = [notes, chunks, edges]
+    if args.raw:
+        for name in cols:
+            info = client.get_collection(collection_name=name)
+            d = info.dict() if hasattr(info, "dict") else info
+            print(json.dumps({"collection": name, "raw": d}, ensure_ascii=False))
+    else:
+        out = [ get_info(client, name) for name in cols ]
+        print(json.dumps({"prefix": cfg.prefix, "collections": out}, ensure_ascii=False, indent=2))
+
+if __name__ == "__main__":
+    main()
--- a/tests/ensure_indexes_and_show.py
+++ b/tests/ensure_indexes_and_show.py
@ -0,0 +1,29 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+scripts/ensure_indexes_and_show.py
+Erzwingt/prüft die Index-Erstellung via ensure_payload_indexes(...) und zeigt danach das payload_schema.
+
+Aufruf:
+  python3 -m scripts.ensure_indexes_and_show
+"""
+from __future__ import annotations
+import json
+from app.core.qdrant import QdrantConfig, get_client, ensure_payload_indexes, collection_names
+
+def main():
+    cfg = QdrantConfig.from_env()
+    client = get_client(cfg)
+    ensure_payload_indexes(client, cfg.prefix)
+    # Danach anzeigen
+    notes, chunks, edges = collection_names(cfg.prefix)
+    cols = [notes, chunks, edges]
+    res = {}
+    for name in cols:
+        info = client.get_collection(collection_name=name)
+        d = info.dict() if hasattr(info, "dict") else info
+        res[name] = d.get("result", {}).get("payload_schema")
+    print(json.dumps(res, ensure_ascii=False, indent=2))
+
+if __name__ == "__main__":
+    main()