Trainer_LLM/scripts/bootstrap_qdrant_exercises.py
Lars efbb978074
All checks were successful
Deploy Trainer_LLM to llm-node / deploy (push) Successful in 2s
scripts/bootstrap_qdrant_exercises.py aktualisiert
2025-08-11 19:16:16 +02:00

63 lines
2.3 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Idempotenter Bootstrap für die Qdrant-Collection 'exercises':
- Legt fehlende Payload-Indizes an (ohne Collection zu droppen)
- Optional prüft Dimension/Distanz
Starten mit
pip install qdrant-client --upgrade
python3 scripts/bootstrap_qdrant_exercises.py
"""
import os
from qdrant_client import QdrantClient
from qdrant_client.models import PayloadSchemaType
QDRANT_HOST = os.getenv("QDRANT_HOST", "localhost")
QDRANT_PORT = int(os.getenv("QDRANT_PORT", "6333"))
COLLECTION = os.getenv("EXERCISE_COLLECTION", "exercises")
INDEX_SPECS = [
("keywords", PayloadSchemaType.KEYWORD),
("equipment", PayloadSchemaType.KEYWORD),
("discipline", PayloadSchemaType.KEYWORD),
("age_group", PayloadSchemaType.KEYWORD),
("target_group", PayloadSchemaType.KEYWORD),
("category", PayloadSchemaType.KEYWORD),
("capability_keys", PayloadSchemaType.KEYWORD),
("capability_ge1", PayloadSchemaType.KEYWORD),
("capability_ge2", PayloadSchemaType.KEYWORD),
("capability_ge3", PayloadSchemaType.KEYWORD),
("capability_ge4", PayloadSchemaType.KEYWORD), # neu
("capability_ge5", PayloadSchemaType.KEYWORD), # neu
("capability_eq1", PayloadSchemaType.KEYWORD), # neu
("capability_eq2", PayloadSchemaType.KEYWORD), # neu
("capability_eq3", PayloadSchemaType.KEYWORD), # neu
("capability_eq4", PayloadSchemaType.KEYWORD), # neu
("capability_eq5", PayloadSchemaType.KEYWORD), # neu
]
def main():
client = QdrantClient(host=QDRANT_HOST, port=QDRANT_PORT)
# Sanity: Collection existiert?
info = client.get_collection(COLLECTION) # wirf Fehler, wenn nicht vorhanden
print(f"[Bootstrap] Collection '{COLLECTION}' gefunden. Vectors={info.config.params.vectors}")
# Indizes idempotent anlegen
for field, schema in INDEX_SPECS:
try:
client.create_payload_index(collection_name=COLLECTION,
field_name=field,
field_schema=schema)
print(f"[Bootstrap] Index created: {field} ({schema})")
except Exception as e:
# Qdrant wirft hier Exceptions, wenn der Index schon existiert das ist ok
print(f"[Bootstrap] Index exists or skipped: {field} -> {e}")
print("[Bootstrap] done.")
if __name__ == "__main__":
main()