From 0c047b708fa63769ffef36faa432f88c42deca24 Mon Sep 17 00:00:00 2001 From: Lars Date: Mon, 11 Aug 2025 19:19:32 +0200 Subject: [PATCH] =?UTF-8?q?scripts/backfill=5Fcapability=5Ffacets.py=20hin?= =?UTF-8?q?zugef=C3=BCgt?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- scripts/backfill_capability_facets.py | 62 +++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) create mode 100644 scripts/backfill_capability_facets.py diff --git a/scripts/backfill_capability_facets.py b/scripts/backfill_capability_facets.py new file mode 100644 index 0000000..b8e89d6 --- /dev/null +++ b/scripts/backfill_capability_facets.py @@ -0,0 +1,62 @@ +#!/usr/bin/env python3 +# backfill_capability_facets.py +import os, math +from qdrant_client import QdrantClient +from qdrant_client.models import Filter, WithPayloadSelector + +COLL = os.getenv("EXERCISE_COLLECTION", "exercises") +client = QdrantClient(host=os.getenv("QDRANT_HOST","localhost"), port=int(os.getenv("QDRANT_PORT","6333"))) + +def names_ge(caps, n): + out=[] + for k,v in (caps or {}).items(): + try: + if int(v) >= n: + out.append(k) + except Exception: + pass + return sorted(out) + +scroll_filter = Filter(must=[]) # alles +offset = None +updated = 0 + +while True: + res = client.scroll( + collection_name=COLL, + scroll_filter=scroll_filter, + with_payload=WithPayloadSelector(enable=True), + limit=256, + offset=offset + ) + points, offset = res + if not points: + break + + set_list = [] + for pt in points: + p = pt.payload or {} + caps = p.get("capabilities") or {} + + cap_keys = sorted([k for k in caps.keys() if k]) + ge1 = names_ge(caps, 1) + ge2 = names_ge(caps, 2) + ge3 = names_ge(caps, 3) + + # nur setzen, wenn fehlt oder abweicht + if p.get("capability_keys") != cap_keys or p.get("capability_ge1") != ge1 \ + or p.get("capability_ge2") != ge2 or p.get("capability_ge3") != ge3: + set_list.append((pt.id, { + "capability_keys": cap_keys, + "capability_ge1": ge1, + "capability_ge2": ge2, + "capability_ge3": ge3 + })) + + if set_list: + client.set_payload(collection_name=COLL, payload={pid: pay for pid, pay in set_list}, points=[pid for pid,_ in set_list]) + updated += len(set_list) + print(f"[Backfill] updated {len(set_list)} points…") + +print(f"[Backfill] done. total={updated}") +