From 5c9f2e8c831dfb51e35767d8f4f70f1e0ef57e4e Mon Sep 17 00:00:00 2001 From: Lars Date: Sun, 16 Nov 2025 17:47:16 +0100 Subject: [PATCH] Dateien nach "scripts" hochladen --- scripts/diag_payload_indexes.py | 122 ++++++++++++++++++++++---------- 1 file changed, 84 insertions(+), 38 deletions(-) diff --git a/scripts/diag_payload_indexes.py b/scripts/diag_payload_indexes.py index 1418e62..1b8fc05 100644 --- a/scripts/diag_payload_indexes.py +++ b/scripts/diag_payload_indexes.py @@ -1,56 +1,102 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- """ -scripts/diag_payload_indexes.py (v1.1) +scripts/diag_payload_indexes.py (compat v1.2) -Zeigt payload_schema (Indizes) je Collection. -WICHTIG: Einige Qdrant-Versionen liefern payload_schema nur, wenn -`with_payload_schema=true` gesetzt wird. Daher setzen wir das Flag explizit. +Zeigt payload_schema je Collection; kompatibel mit älteren qdrant-client Versionen. +Strategie (in dieser Reihenfolge): + 1) openapi_client.collections_api.get_collection(..., with_payload_schema=True) + 2) client.get_collection(...) + 3) HTTP GET /collections/{name}?with_payload_schema=true """ from __future__ import annotations -import argparse, json -from qdrant_client.http import models as rest + +import json +import os +import urllib.request +import urllib.error +from typing import Any, Dict + from app.core.qdrant import QdrantConfig, get_client, collection_names -def compact_schema(ps: dict | None) -> dict: - if not isinstance(ps, dict): - return {} - out = {} - for k, v in ps.items(): - if isinstance(v, dict) and "type" in v: - out[k] = v["type"] - else: - out[k] = v - return out + +def _safe_model_dump(obj: Any) -> Dict[str, Any]: + if hasattr(obj, "model_dump"): + return obj.model_dump() + if hasattr(obj, "dict"): + return obj.dict() + return obj if isinstance(obj, dict) else {} + + +def _cfg_base_url(cfg) -> str: + if getattr(cfg, "url", None): + return cfg.url.rstrip("/") + host = getattr(cfg, "host", None) or os.getenv("QDRANT_HOST") or "127.0.0.1" + port = getattr(cfg, "port", None) or os.getenv("QDRANT_PORT") or "6333" + return f"http://{host}:{port}" + + +def _http_get(url: str, api_key: str | None) -> Dict[str, Any]: + req = urllib.request.Request(url, method="GET") + if api_key: + req.add_header("api-key", api_key) + req.add_header("Authorization", f"Bearer {api_key}") + req.add_header("Accept", "application/json") + with urllib.request.urlopen(req, timeout=30) as resp: + data = resp.read() + return json.loads(data.decode("utf-8")) + + +def get_collection_payload_schema(client, cfg, name: str) -> Dict[str, Any] | None: + # Strategy 1: OpenAPI client with flag + try: + oc = getattr(client, "openapi_client", None) + if oc is not None and hasattr(oc, "collections_api"): + api = oc.collections_api + info = api.get_collection(collection_name=name, with_payload_schema=True) + d = _safe_model_dump(info) + return (d.get("result") or {}).get("payload_schema") + except Exception: + pass + # Strategy 2: wrapper (manche Versionen liefern Schema auch ohne Flag) + try: + info = client.get_collection(collection_name=name) # kein with_payload_schema kwarg! + d = _safe_model_dump(info) + ps = (d.get("result") or {}).get("payload_schema") + if ps is not None: + return ps + except Exception: + pass + # Strategy 3: direkter HTTP Call + try: + base = _cfg_base_url(cfg) + url = f"{base}/collections/{name}?with_payload_schema=true" + raw = _http_get(url, getattr(cfg, "api_key", None)) + return (raw.get("result") or {}).get("payload_schema") + except Exception: + return None + def main(): - ap = argparse.ArgumentParser() - ap.add_argument("--raw", action="store_true") - args = ap.parse_args() - cfg = QdrantConfig.from_env() client = get_client(cfg) notes, chunks, edges = collection_names(cfg.prefix) - cols = [notes, chunks, edges] - result = [] + out = [] for name in cols: - info = client.get_collection(collection_name=name, with_payload_schema=True) - d = info.model_dump() if hasattr(info, "model_dump") else (info.dict() if hasattr(info, "dict") else info) - payload_schema = (d.get("result") or {}).get("payload_schema") - vectors = (d.get("result") or {}).get("vectors") - if isinstance(vectors, dict) and "config" in vectors: - vectors = vectors.get("config") - if args.raw: - result.append({"collection": name, "raw": d}) - else: - result.append({ - "name": name, - "vectors": vectors, - "payload_schema": compact_schema(payload_schema), - "segments_count": (d.get("result") or {}).get("segments_count"), - }) - print(json.dumps({"prefix": cfg.prefix, "collections": result}, ensure_ascii=False, indent=2)) + ps = get_collection_payload_schema(client, cfg, name) + # Vektorinfos (optional) + try: + info = client.get_collection(collection_name=name) + d = _safe_model_dump(info) + vectors = (d.get("result") or {}).get("vectors") + if isinstance(vectors, dict) and "config" in vectors: + vectors = vectors["config"] + except Exception: + vectors = None + out.append({"name": name, "vectors": vectors, "payload_schema": ps}) + print(json.dumps({"prefix": cfg.prefix, "collections": out}, ensure_ascii=False, indent=2)) + if __name__ == "__main__": main()