#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ FILE: scripts/diag_payload_indexes.py VERSION: 2.1.0 (2025-12-15) STATUS: Active COMPATIBILITY: v2.9.1 (Post-WP14/WP-15b) Zweck: ------- Zeigt das Payload-Schema (Indizes) für alle Collections eines Präfixes. Nützlich zur Diagnose von Index-Problemen oder Validierung der Schema-Struktur. Funktionsweise: --------------- 1. Ermittelt Collections für das Präfix (notes, chunks, edges) 2. Für jede Collection: - Versucht Payload-Schema über verschiedene Methoden zu laden: * qdrant-client API (get_collection) * HTTP GET direkt (Fallback) - Zeigt Schema als JSON 3. Gibt Schema-Übersicht aus Ergebnis-Interpretation: ------------------------ - Ausgabe: JSON pro Collection mit payload_schema - Zeigt vorhandene Indizes (keyword, text) und deren Konfiguration - Exit-Code 0: Erfolgreich Verwendung: ----------- - Diagnose von Index-Problemen - Validierung nach ensure_payload_indexes - Dokumentation der Schema-Struktur Hinweise: --------- - Kompatibel mit verschiedenen qdrant-client Versionen - Nutzt Fallback-Strategie für maximale Kompatibilität Aufruf: ------- python3 -m scripts.diag_payload_indexes --prefix mindnet Parameter: ---------- --prefix TEXT Collection-Präfix (Default: ENV COLLECTION_PREFIX oder mindnet) Änderungen: ----------- v2.1.0 (2025-12-15): Dokumentation aktualisiert v1.2.0: Kompatibilität mit verschiedenen qdrant-client Versionen v1.0.0: Initial Release """ from __future__ import annotations import json import os import urllib.request import urllib.error from typing import Any, Dict from app.core.qdrant import QdrantConfig, get_client, collection_names def _safe_model_dump(obj: Any) -> Dict[str, Any]: if hasattr(obj, "model_dump"): return obj.model_dump() if hasattr(obj, "dict"): return obj.dict() return obj if isinstance(obj, dict) else {} def _cfg_base_url(cfg) -> str: if getattr(cfg, "url", None): return cfg.url.rstrip("/") host = getattr(cfg, "host", None) or os.getenv("QDRANT_HOST") or "127.0.0.1" port = getattr(cfg, "port", None) or os.getenv("QDRANT_PORT") or "6333" return f"http://{host}:{port}" def _http_get(url: str, api_key: str | None) -> Dict[str, Any]: req = urllib.request.Request(url, method="GET") if api_key: req.add_header("api-key", api_key) req.add_header("Authorization", f"Bearer {api_key}") req.add_header("Accept", "application/json") with urllib.request.urlopen(req, timeout=30) as resp: data = resp.read() return json.loads(data.decode("utf-8")) def get_collection_payload_schema(client, cfg, name: str) -> Dict[str, Any] | None: # Strategy 1: OpenAPI client with flag try: oc = getattr(client, "openapi_client", None) if oc is not None and hasattr(oc, "collections_api"): api = oc.collections_api info = api.get_collection(collection_name=name, with_payload_schema=True) d = _safe_model_dump(info) return (d.get("result") or {}).get("payload_schema") except Exception: pass # Strategy 2: wrapper (manche Versionen liefern Schema auch ohne Flag) try: info = client.get_collection(collection_name=name) # kein with_payload_schema kwarg! d = _safe_model_dump(info) ps = (d.get("result") or {}).get("payload_schema") if ps is not None: return ps except Exception: pass # Strategy 3: direkter HTTP Call try: base = _cfg_base_url(cfg) url = f"{base}/collections/{name}?with_payload_schema=true" raw = _http_get(url, getattr(cfg, "api_key", None)) return (raw.get("result") or {}).get("payload_schema") except Exception: return None def main(): cfg = QdrantConfig.from_env() client = get_client(cfg) notes, chunks, edges = collection_names(cfg.prefix) cols = [notes, chunks, edges] out = [] for name in cols: ps = get_collection_payload_schema(client, cfg, name) # Vektorinfos (optional) try: info = client.get_collection(collection_name=name) d = _safe_model_dump(info) vectors = (d.get("result") or {}).get("vectors") if isinstance(vectors, dict) and "config" in vectors: vectors = vectors["config"] except Exception: vectors = None out.append({"name": name, "vectors": vectors, "payload_schema": ps}) print(json.dumps({"prefix": cfg.prefix, "collections": out}, ensure_ascii=False, indent=2)) if __name__ == "__main__": main()