scripts/backfill_capability_facets.py hinzugefügt
All checks were successful
Deploy Trainer_LLM to llm-node / deploy (push) Successful in 2s
All checks were successful
Deploy Trainer_LLM to llm-node / deploy (push) Successful in 2s
This commit is contained in:
parent
efbb978074
commit
0c047b708f
62
scripts/backfill_capability_facets.py
Normal file
62
scripts/backfill_capability_facets.py
Normal file
|
|
@ -0,0 +1,62 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# backfill_capability_facets.py
|
||||||
|
import os, math
|
||||||
|
from qdrant_client import QdrantClient
|
||||||
|
from qdrant_client.models import Filter, WithPayloadSelector
|
||||||
|
|
||||||
|
COLL = os.getenv("EXERCISE_COLLECTION", "exercises")
|
||||||
|
client = QdrantClient(host=os.getenv("QDRANT_HOST","localhost"), port=int(os.getenv("QDRANT_PORT","6333")))
|
||||||
|
|
||||||
|
def names_ge(caps, n):
|
||||||
|
out=[]
|
||||||
|
for k,v in (caps or {}).items():
|
||||||
|
try:
|
||||||
|
if int(v) >= n:
|
||||||
|
out.append(k)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
return sorted(out)
|
||||||
|
|
||||||
|
scroll_filter = Filter(must=[]) # alles
|
||||||
|
offset = None
|
||||||
|
updated = 0
|
||||||
|
|
||||||
|
while True:
|
||||||
|
res = client.scroll(
|
||||||
|
collection_name=COLL,
|
||||||
|
scroll_filter=scroll_filter,
|
||||||
|
with_payload=WithPayloadSelector(enable=True),
|
||||||
|
limit=256,
|
||||||
|
offset=offset
|
||||||
|
)
|
||||||
|
points, offset = res
|
||||||
|
if not points:
|
||||||
|
break
|
||||||
|
|
||||||
|
set_list = []
|
||||||
|
for pt in points:
|
||||||
|
p = pt.payload or {}
|
||||||
|
caps = p.get("capabilities") or {}
|
||||||
|
|
||||||
|
cap_keys = sorted([k for k in caps.keys() if k])
|
||||||
|
ge1 = names_ge(caps, 1)
|
||||||
|
ge2 = names_ge(caps, 2)
|
||||||
|
ge3 = names_ge(caps, 3)
|
||||||
|
|
||||||
|
# nur setzen, wenn fehlt oder abweicht
|
||||||
|
if p.get("capability_keys") != cap_keys or p.get("capability_ge1") != ge1 \
|
||||||
|
or p.get("capability_ge2") != ge2 or p.get("capability_ge3") != ge3:
|
||||||
|
set_list.append((pt.id, {
|
||||||
|
"capability_keys": cap_keys,
|
||||||
|
"capability_ge1": ge1,
|
||||||
|
"capability_ge2": ge2,
|
||||||
|
"capability_ge3": ge3
|
||||||
|
}))
|
||||||
|
|
||||||
|
if set_list:
|
||||||
|
client.set_payload(collection_name=COLL, payload={pid: pay for pid, pay in set_list}, points=[pid for pid,_ in set_list])
|
||||||
|
updated += len(set_list)
|
||||||
|
print(f"[Backfill] updated {len(set_list)} points…")
|
||||||
|
|
||||||
|
print(f"[Backfill] done. total={updated}")
|
||||||
|
|
||||||
Loading…
Reference in New Issue
Block a user