Trainer_LLM/scripts/bootstrap_qdrant_plans.py
Lars 4d67cd9d66
All checks were successful
Deploy Trainer_LLM to llm-node / deploy (push) Successful in 2s
scripts/bootstrap_qdrant_plans.py aktualisiert
2025-08-12 12:52:41 +02:00

88 lines
3.5 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Bootstrap (idempotent) für Qdrant-Collections rund um Pläne v1.1.0
- Legt fehlende Payload-Indizes als KEYWORD an (wo sinnvoll)
- Nutzt materialisierte Facettenfelder auf Templates (section_*),
um verschachtelte Pfade stabil zu indizieren
- Collections selbst werden NICHT erstellt; das passiert beim ersten Upsert
Start:
pip install qdrant-client --upgrade
python3 scripts/bootstrap_qdrant_plans.py
"""
import os
from qdrant_client import QdrantClient
from qdrant_client.models import PayloadSchemaType
QDRANT_HOST = os.getenv("QDRANT_HOST", "localhost")
QDRANT_PORT = int(os.getenv("QDRANT_PORT", "6333"))
PLANS = os.getenv("PLAN_COLLECTION") or os.getenv("QDRANT_COLLECTION_PLANS", "plans")
TEMPLATES = os.getenv("PLAN_TEMPLATE_COLLECTION", "plan_templates")
SESSIONS = os.getenv("PLAN_SESSION_COLLECTION", "plan_sessions")
INDEX_SPECS = {
# Templates: nutzen materialisierte Felder für stabile Indizes
TEMPLATES: [
("discipline", PayloadSchemaType.KEYWORD),
("age_group", PayloadSchemaType.KEYWORD),
("target_group", PayloadSchemaType.KEYWORD),
("section_names", PayloadSchemaType.KEYWORD),
("section_must_keywords", PayloadSchemaType.KEYWORD),
("section_ideal_keywords", PayloadSchemaType.KEYWORD),
("section_supplement_keywords", PayloadSchemaType.KEYWORD),
("section_forbid_keywords", PayloadSchemaType.KEYWORD),
("goals", PayloadSchemaType.KEYWORD),
],
# Plans: belassen verschachtelte Felder wie bisher; falls deine Qdrant-Version das
# nicht unterstützt, können wir analog materialisieren (section_names) und hier ergänzen.
PLANS: [
("discipline", PayloadSchemaType.KEYWORD),
("age_group", PayloadSchemaType.KEYWORD),
("target_group", PayloadSchemaType.KEYWORD),
("sections.name", PayloadSchemaType.KEYWORD),
("goals", PayloadSchemaType.KEYWORD),
("created_by", PayloadSchemaType.KEYWORD),
("created_at", PayloadSchemaType.KEYWORD),
("fingerprint", PayloadSchemaType.KEYWORD),
("title", PayloadSchemaType.KEYWORD),
],
SESSIONS: [
("plan_id", PayloadSchemaType.KEYWORD),
("executed_at", PayloadSchemaType.KEYWORD),
("coach", PayloadSchemaType.KEYWORD),
("group_label", PayloadSchemaType.KEYWORD),
],
}
def _create_indexes(client: QdrantClient, collection: str, specs):
# Prüfe, ob Collection existiert (wir erwarten sie). Falls nicht, Hinweis, aber nicht fatal.
try:
client.get_collection(collection)
print(f"[Bootstrap v1.1.0] Collection '{collection}' ok.")
except Exception as e:
print(f"[Bootstrap v1.1.0] WARN: Collection '{collection}' nicht gefunden (wird beim ersten Upsert erstellt). Details: {e}")
return
for field, schema in specs:
try:
client.create_payload_index(collection_name=collection, field_name=field, field_schema=schema)
print(f"[Bootstrap v1.1.0] Index created: {collection}.{field} ({schema})")
except Exception as e:
print(f"[Bootstrap v1.1.0] Index exists or skipped: {collection}.{field} -> {e}")
def main():
print(f"[Bootstrap v1.1.0] Qdrant @ {QDRANT_HOST}:{QDRANT_PORT}")
print(f"[Bootstrap v1.1.0] Collections: TEMPLATES={TEMPLATES} PLANS={PLANS} SESSIONS={SESSIONS}")
client = QdrantClient(host=QDRANT_HOST, port=QDRANT_PORT)
for coll, specs in INDEX_SPECS.items():
_create_indexes(client, coll, specs)
print("[Bootstrap v1.1.0] done.")
if __name__ == "__main__":
main()