Dateien nach "tests" hochladen
All checks were successful
Deploy mindnet to llm-node / deploy (push) Successful in 4s

This commit is contained in:
Lars 2025-11-16 17:36:09 +01:00
parent 2e7c497b69
commit eafa0fad85
2 changed files with 36 additions and 55 deletions

View File

@ -1,56 +1,31 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
""" """
scripts/diag_payload_indexes.py scripts/diag_payload_indexes.py (v1.1)
Listet die Payload-Indizes (payload_schema) für alle Mindnet-Collections.
Nutzt client.get_collection(name) und zeigt: Zeigt payload_schema (Indizes) je Collection.
- payload_schema (Feld -> Schema/Typ) WICHTIG: Einige Qdrant-Versionen liefern payload_schema nur, wenn
- Vector-Konfiguration (size, distance, named vectors) `with_payload_schema=true` gesetzt wird. Daher setzen wir das Flag explizit.
- Segment-Infos (optional komprimiert)
Aufruf:
python3 -m scripts.diag_payload_indexes
python3 -m scripts.diag_payload_indexes --raw # vollständiges JSON
Hinweis: In Qdrant werden die Payload-Indizes im Collection-Info-Endpunkt unter
`result.payload_schema` geliefert.
""" """
from __future__ import annotations from __future__ import annotations
import argparse, json import argparse, json
from qdrant_client.http import models as rest from qdrant_client.http import models as rest
from app.core.qdrant import QdrantConfig, get_client, collection_names from app.core.qdrant import QdrantConfig, get_client, collection_names
def compact_schema(ps: dict) -> dict: def compact_schema(ps: dict | None) -> dict:
if not isinstance(ps, dict):
return {}
out = {} out = {}
for k, v in (ps or {}).items(): for k, v in ps.items():
# v ist z.B. {"type":"keyword"} oder {"type":"text"} etc. if isinstance(v, dict) and "type" in v:
if isinstance(v, dict): out[k] = v["type"]
out[k] = v.get("type") or v
else: else:
out[k] = v out[k] = v
return out return out
def get_info(client, name: str) -> dict:
info = client.get_collection(collection_name=name)
# Das SDK liefert .dict(). Wir extrahieren das Wesentliche.
d = info.dict() if hasattr(info, "dict") else info
result = d.get("result", {})
vectors = result.get("vectors") or {}
if isinstance(vectors, dict) and "config" in vectors:
vectors = vectors.get("config")
payload_schema = result.get("payload_schema") or {}
segments = result.get("segments_count")
return {
"name": name,
"vectors": vectors,
"payload_schema": compact_schema(payload_schema),
"segments_count": segments,
}
def main(): def main():
ap = argparse.ArgumentParser() ap = argparse.ArgumentParser()
ap.add_argument("--raw", action="store_true", help="Gibt das rohe get_collection-Result zurück") ap.add_argument("--raw", action="store_true")
args = ap.parse_args() args = ap.parse_args()
cfg = QdrantConfig.from_env() cfg = QdrantConfig.from_env()
@ -58,14 +33,24 @@ def main():
notes, chunks, edges = collection_names(cfg.prefix) notes, chunks, edges = collection_names(cfg.prefix)
cols = [notes, chunks, edges] cols = [notes, chunks, edges]
if args.raw: result = []
for name in cols: for name in cols:
info = client.get_collection(collection_name=name) info = client.get_collection(collection_name=name, with_payload_schema=True)
d = info.dict() if hasattr(info, "dict") else info d = info.model_dump() if hasattr(info, "model_dump") else (info.dict() if hasattr(info, "dict") else info)
print(json.dumps({"collection": name, "raw": d}, ensure_ascii=False)) payload_schema = (d.get("result") or {}).get("payload_schema")
else: vectors = (d.get("result") or {}).get("vectors")
out = [ get_info(client, name) for name in cols ] if isinstance(vectors, dict) and "config" in vectors:
print(json.dumps({"prefix": cfg.prefix, "collections": out}, ensure_ascii=False, indent=2)) vectors = vectors.get("config")
if args.raw:
result.append({"collection": name, "raw": d})
else:
result.append({
"name": name,
"vectors": vectors,
"payload_schema": compact_schema(payload_schema),
"segments_count": (d.get("result") or {}).get("segments_count"),
})
print(json.dumps({"prefix": cfg.prefix, "collections": result}, ensure_ascii=False, indent=2))
if __name__ == "__main__": if __name__ == "__main__":
main() main()

View File

@ -1,11 +1,9 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
""" """
scripts/ensure_indexes_and_show.py tests/ensure_indexes_and_show.py (v1.0)
Erzwingt/prüft die Index-Erstellung via ensure_payload_indexes(...) und zeigt danach das payload_schema. Alias für die Test-Suite: ruft ensure_payload_indexes(...) auf
und zeigt danach das payload_schema (mit with_payload_schema=True).
Aufruf:
python3 -m scripts.ensure_indexes_and_show
""" """
from __future__ import annotations from __future__ import annotations
import json import json
@ -15,14 +13,12 @@ def main():
cfg = QdrantConfig.from_env() cfg = QdrantConfig.from_env()
client = get_client(cfg) client = get_client(cfg)
ensure_payload_indexes(client, cfg.prefix) ensure_payload_indexes(client, cfg.prefix)
# Danach anzeigen
notes, chunks, edges = collection_names(cfg.prefix) notes, chunks, edges = collection_names(cfg.prefix)
cols = [notes, chunks, edges]
res = {} res = {}
for name in cols: for name in (notes, chunks, edges):
info = client.get_collection(collection_name=name) info = client.get_collection(collection_name=name, with_payload_schema=True)
d = info.dict() if hasattr(info, "dict") else info d = info.model_dump() if hasattr(info, "model_dump") else (info.dict() if hasattr(info, "dict") else info)
res[name] = d.get("result", {}).get("payload_schema") res[name] = (d.get("result") or {}).get("payload_schema")
print(json.dumps(res, ensure_ascii=False, indent=2)) print(json.dumps(res, ensure_ascii=False, indent=2))
if __name__ == "__main__": if __name__ == "__main__":