scripts/backfill_capability_facets.py hinzugefügt
All checks were successful
Deploy Trainer_LLM to llm-node / deploy (push) Successful in 2s
All checks were successful
Deploy Trainer_LLM to llm-node / deploy (push) Successful in 2s
This commit is contained in:
parent
efbb978074
commit
0c047b708f
62
scripts/backfill_capability_facets.py
Normal file
62
scripts/backfill_capability_facets.py
Normal file
|
|
@ -0,0 +1,62 @@
|
|||
#!/usr/bin/env python3
|
||||
# backfill_capability_facets.py
|
||||
import os, math
|
||||
from qdrant_client import QdrantClient
|
||||
from qdrant_client.models import Filter, WithPayloadSelector
|
||||
|
||||
COLL = os.getenv("EXERCISE_COLLECTION", "exercises")
|
||||
client = QdrantClient(host=os.getenv("QDRANT_HOST","localhost"), port=int(os.getenv("QDRANT_PORT","6333")))
|
||||
|
||||
def names_ge(caps, n):
|
||||
out=[]
|
||||
for k,v in (caps or {}).items():
|
||||
try:
|
||||
if int(v) >= n:
|
||||
out.append(k)
|
||||
except Exception:
|
||||
pass
|
||||
return sorted(out)
|
||||
|
||||
scroll_filter = Filter(must=[]) # alles
|
||||
offset = None
|
||||
updated = 0
|
||||
|
||||
while True:
|
||||
res = client.scroll(
|
||||
collection_name=COLL,
|
||||
scroll_filter=scroll_filter,
|
||||
with_payload=WithPayloadSelector(enable=True),
|
||||
limit=256,
|
||||
offset=offset
|
||||
)
|
||||
points, offset = res
|
||||
if not points:
|
||||
break
|
||||
|
||||
set_list = []
|
||||
for pt in points:
|
||||
p = pt.payload or {}
|
||||
caps = p.get("capabilities") or {}
|
||||
|
||||
cap_keys = sorted([k for k in caps.keys() if k])
|
||||
ge1 = names_ge(caps, 1)
|
||||
ge2 = names_ge(caps, 2)
|
||||
ge3 = names_ge(caps, 3)
|
||||
|
||||
# nur setzen, wenn fehlt oder abweicht
|
||||
if p.get("capability_keys") != cap_keys or p.get("capability_ge1") != ge1 \
|
||||
or p.get("capability_ge2") != ge2 or p.get("capability_ge3") != ge3:
|
||||
set_list.append((pt.id, {
|
||||
"capability_keys": cap_keys,
|
||||
"capability_ge1": ge1,
|
||||
"capability_ge2": ge2,
|
||||
"capability_ge3": ge3
|
||||
}))
|
||||
|
||||
if set_list:
|
||||
client.set_payload(collection_name=COLL, payload={pid: pay for pid, pay in set_list}, points=[pid for pid,_ in set_list])
|
||||
updated += len(set_list)
|
||||
print(f"[Backfill] updated {len(set_list)} points…")
|
||||
|
||||
print(f"[Backfill] done. total={updated}")
|
||||
|
||||
Loading…
Reference in New Issue
Block a user