scripts/backfill_capability_facets.py hinzugefügt
All checks were successful
Deploy Trainer_LLM to llm-node / deploy (push) Successful in 2s

This commit is contained in:
Lars 2025-08-11 19:19:32 +02:00
parent efbb978074
commit 0c047b708f

View File

@ -0,0 +1,62 @@
#!/usr/bin/env python3
# backfill_capability_facets.py
import os, math
from qdrant_client import QdrantClient
from qdrant_client.models import Filter, WithPayloadSelector
COLL = os.getenv("EXERCISE_COLLECTION", "exercises")
client = QdrantClient(host=os.getenv("QDRANT_HOST","localhost"), port=int(os.getenv("QDRANT_PORT","6333")))
def names_ge(caps, n):
out=[]
for k,v in (caps or {}).items():
try:
if int(v) >= n:
out.append(k)
except Exception:
pass
return sorted(out)
scroll_filter = Filter(must=[]) # alles
offset = None
updated = 0
while True:
res = client.scroll(
collection_name=COLL,
scroll_filter=scroll_filter,
with_payload=WithPayloadSelector(enable=True),
limit=256,
offset=offset
)
points, offset = res
if not points:
break
set_list = []
for pt in points:
p = pt.payload or {}
caps = p.get("capabilities") or {}
cap_keys = sorted([k for k in caps.keys() if k])
ge1 = names_ge(caps, 1)
ge2 = names_ge(caps, 2)
ge3 = names_ge(caps, 3)
# nur setzen, wenn fehlt oder abweicht
if p.get("capability_keys") != cap_keys or p.get("capability_ge1") != ge1 \
or p.get("capability_ge2") != ge2 or p.get("capability_ge3") != ge3:
set_list.append((pt.id, {
"capability_keys": cap_keys,
"capability_ge1": ge1,
"capability_ge2": ge2,
"capability_ge3": ge3
}))
if set_list:
client.set_payload(collection_name=COLL, payload={pid: pay for pid, pay in set_list}, points=[pid for pid,_ in set_list])
updated += len(set_list)
print(f"[Backfill] updated {len(set_list)} points…")
print(f"[Backfill] done. total={updated}")