#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Backfill Capability-Facetten in Qdrant – v1.2 Fix: beendet korrekt, wenn `next_page_offset` (offset) None ist. """ import os from typing import Dict, Any, List from qdrant_client import QdrantClient COLL = os.getenv("EXERCISE_COLLECTION", "exercises") QDRANT_HOST = os.getenv("QDRANT_HOST", "localhost") QDRANT_PORT = int(os.getenv("QDRANT_PORT", "6333")) BATCH = int(os.getenv("BACKFILL_BATCH", "256")) def _facet_capabilities(caps: Dict[str, Any]) -> Dict[str, List[str]]: caps = caps or {} def names_where(pred) -> List[str]: out = [] for k, v in caps.items(): try: iv = int(v) except Exception: iv = 0 if pred(iv): s = str(k).strip() if s: out.append(s) return sorted({s for s in out}, key=str.casefold) all_keys = sorted({str(k).strip() for k in caps.keys() if str(k).strip()}, key=str.casefold) return { "capability_keys": all_keys, "capability_ge1": names_where(lambda lv: lv >= 1), "capability_ge2": names_where(lambda lv: lv >= 2), "capability_ge3": names_where(lambda lv: lv >= 3), "capability_ge4": names_where(lambda lv: lv >= 4), "capability_ge5": names_where(lambda lv: lv >= 5), "capability_eq1": names_where(lambda lv: lv == 1), "capability_eq2": names_where(lambda lv: lv == 2), "capability_eq3": names_where(lambda lv: lv == 3), "capability_eq4": names_where(lambda lv: lv == 4), "capability_eq5": names_where(lambda lv: lv == 5), } def main() -> None: client = QdrantClient(host=QDRANT_HOST, port=QDRANT_PORT) info = client.get_collection(COLL) print(f"[Backfill] Collection '{COLL}' ok – vectors={info.config.params.vectors}") updated_total = 0 offset = None page = 0 while True: points, next_offset = client.scroll( collection_name=COLL, scroll_filter=None, offset=offset, limit=BATCH, with_payload=True, ) page += 1 if not points: print("[Backfill] no more points – done") break updated_page = 0 for pt in points: pld = pt.payload or {} caps = pld.get("capabilities") or {} facets = _facet_capabilities(caps) # nur setzen, wenn sich etwas ändert need = any(pld.get(k) != v for k, v in facets.items()) if not need: continue client.set_payload(collection_name=COLL, points=[pt.id], payload=facets) updated_total += 1 updated_page += 1 print(f"[Backfill] page={page} processed={len(points)} updated_page={updated_page} updated_total={updated_total}") # Ende erreicht? Dann nach dieser Seite aussteigen. if next_offset is None: break offset = next_offset print(f"[Backfill] done. total_updated={updated_total}") if __name__ == "__main__": main()