mindnet/scripts/diag_payload_indexes.py
Lars e93bab6ea7
All checks were successful
Deploy mindnet to llm-node / deploy (push) Successful in 4s
Fassadenauflösung unter app/core
2025-12-28 11:04:40 +01:00

146 lines
4.5 KiB
Python

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
FILE: scripts/diag_payload_indexes.py
VERSION: 2.1.0 (2025-12-15)
STATUS: Active
COMPATIBILITY: v2.9.1 (Post-WP14/WP-15b)
Zweck:
-------
Zeigt das Payload-Schema (Indizes) für alle Collections eines Präfixes.
Nützlich zur Diagnose von Index-Problemen oder Validierung der Schema-Struktur.
Funktionsweise:
---------------
1. Ermittelt Collections für das Präfix (notes, chunks, edges)
2. Für jede Collection:
- Versucht Payload-Schema über verschiedene Methoden zu laden:
* qdrant-client API (get_collection)
* HTTP GET direkt (Fallback)
- Zeigt Schema als JSON
3. Gibt Schema-Übersicht aus
Ergebnis-Interpretation:
------------------------
- Ausgabe: JSON pro Collection mit payload_schema
- Zeigt vorhandene Indizes (keyword, text) und deren Konfiguration
- Exit-Code 0: Erfolgreich
Verwendung:
-----------
- Diagnose von Index-Problemen
- Validierung nach ensure_payload_indexes
- Dokumentation der Schema-Struktur
Hinweise:
---------
- Kompatibel mit verschiedenen qdrant-client Versionen
- Nutzt Fallback-Strategie für maximale Kompatibilität
Aufruf:
-------
python3 -m scripts.diag_payload_indexes --prefix mindnet
Parameter:
----------
--prefix TEXT Collection-Präfix (Default: ENV COLLECTION_PREFIX oder mindnet)
Änderungen:
-----------
v2.1.0 (2025-12-15): Dokumentation aktualisiert
v1.2.0: Kompatibilität mit verschiedenen qdrant-client Versionen
v1.0.0: Initial Release
"""
from __future__ import annotations
import json
import os
import urllib.request
import urllib.error
from typing import Any, Dict
from app.core.database.qdrant import QdrantConfig, get_client, collection_names
def _safe_model_dump(obj: Any) -> Dict[str, Any]:
if hasattr(obj, "model_dump"):
return obj.model_dump()
if hasattr(obj, "dict"):
return obj.dict()
return obj if isinstance(obj, dict) else {}
def _cfg_base_url(cfg) -> str:
if getattr(cfg, "url", None):
return cfg.url.rstrip("/")
host = getattr(cfg, "host", None) or os.getenv("QDRANT_HOST") or "127.0.0.1"
port = getattr(cfg, "port", None) or os.getenv("QDRANT_PORT") or "6333"
return f"http://{host}:{port}"
def _http_get(url: str, api_key: str | None) -> Dict[str, Any]:
req = urllib.request.Request(url, method="GET")
if api_key:
req.add_header("api-key", api_key)
req.add_header("Authorization", f"Bearer {api_key}")
req.add_header("Accept", "application/json")
with urllib.request.urlopen(req, timeout=30) as resp:
data = resp.read()
return json.loads(data.decode("utf-8"))
def get_collection_payload_schema(client, cfg, name: str) -> Dict[str, Any] | None:
# Strategy 1: OpenAPI client with flag
try:
oc = getattr(client, "openapi_client", None)
if oc is not None and hasattr(oc, "collections_api"):
api = oc.collections_api
info = api.get_collection(collection_name=name, with_payload_schema=True)
d = _safe_model_dump(info)
return (d.get("result") or {}).get("payload_schema")
except Exception:
pass
# Strategy 2: wrapper (manche Versionen liefern Schema auch ohne Flag)
try:
info = client.get_collection(collection_name=name) # kein with_payload_schema kwarg!
d = _safe_model_dump(info)
ps = (d.get("result") or {}).get("payload_schema")
if ps is not None:
return ps
except Exception:
pass
# Strategy 3: direkter HTTP Call
try:
base = _cfg_base_url(cfg)
url = f"{base}/collections/{name}?with_payload_schema=true"
raw = _http_get(url, getattr(cfg, "api_key", None))
return (raw.get("result") or {}).get("payload_schema")
except Exception:
return None
def main():
cfg = QdrantConfig.from_env()
client = get_client(cfg)
notes, chunks, edges = collection_names(cfg.prefix)
cols = [notes, chunks, edges]
out = []
for name in cols:
ps = get_collection_payload_schema(client, cfg, name)
# Vektorinfos (optional)
try:
info = client.get_collection(collection_name=name)
d = _safe_model_dump(info)
vectors = (d.get("result") or {}).get("vectors")
if isinstance(vectors, dict) and "config" in vectors:
vectors = vectors["config"]
except Exception:
vectors = None
out.append({"name": name, "vectors": vectors, "payload_schema": ps})
print(json.dumps({"prefix": cfg.prefix, "collections": out}, ensure_ascii=False, indent=2))
if __name__ == "__main__":
main()