Trainer_LLM/scripts/bootstrap_qdrant_plans.py
Lars 070f9967bc
All checks were successful
Deploy Trainer_LLM to llm-node / deploy (push) Successful in 1s
scripts/bootstrap_qdrant_plans.py aktualisiert
2025-08-13 08:02:43 +02:00

107 lines
4.3 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Bootstrap (idempotent) für Qdrant-Collections rund um Pläne v1.2.1
Änderungen ggü. v1.1.0:
- NEU: KEYWORD-Index `plan_section_names` in der `plans`-Collection (robuste Section-Filter).
- Beibehalt: materialisierte Template-Facetten (section_*), Sessions-Indizes.
- Idempotent: existierende Indizes werden sauber übersprungen.
Hinweis:
- Das Script erstellt KEINE Collections; sie werden beim ersten Upsert durch die Router angelegt.
- Läuft gefahrlos mehrfach.
"""
import os
from qdrant_client import QdrantClient
from qdrant_client.models import PayloadSchemaType
# -------------------------
# ENV / Defaults
# -------------------------
QDRANT_HOST = os.getenv("QDRANT_HOST", "localhost")
QDRANT_PORT = int(os.getenv("QDRANT_PORT", "6333"))
PLANS = os.getenv("PLAN_COLLECTION") or os.getenv("QDRANT_COLLECTION_PLANS", "plans")
TEMPLATES = os.getenv("PLAN_TEMPLATE_COLLECTION", "plan_templates")
SESSIONS = os.getenv("PLAN_SESSION_COLLECTION", "plan_sessions")
# -------------------------
# Index-Spezifikation (KEYWORD)
# -------------------------
INDEX_SPECS = {
# Templates: nutzen materialisierte Felder für stabile Indizes
TEMPLATES: [
("discipline", PayloadSchemaType.KEYWORD),
("age_group", PayloadSchemaType.KEYWORD),
("target_group", PayloadSchemaType.KEYWORD),
("section_names", PayloadSchemaType.KEYWORD),
("section_must_keywords", PayloadSchemaType.KEYWORD),
("section_ideal_keywords", PayloadSchemaType.KEYWORD),
("section_supplement_keywords", PayloadSchemaType.KEYWORD),
("section_forbid_keywords", PayloadSchemaType.KEYWORD),
("goals", PayloadSchemaType.KEYWORD),
],
# Plans: zusätzlich zum verschachtelten Pfad auch materialisierte Section-Namen
PLANS: [
("discipline", PayloadSchemaType.KEYWORD),
("age_group", PayloadSchemaType.KEYWORD),
("target_group", PayloadSchemaType.KEYWORD),
("sections.name", PayloadSchemaType.KEYWORD), # Alt; belassen für Kompatibilität
("plan_section_names", PayloadSchemaType.KEYWORD), # Neu & robust
("goals", PayloadSchemaType.KEYWORD),
("created_by", PayloadSchemaType.KEYWORD),
("created_at", PayloadSchemaType.KEYWORD),
("fingerprint", PayloadSchemaType.KEYWORD),
("title", PayloadSchemaType.KEYWORD),
],
# Sessions
SESSIONS: [
("plan_id", PayloadSchemaType.KEYWORD),
("executed_at", PayloadSchemaType.KEYWORD),
("coach", PayloadSchemaType.KEYWORD),
("group_label", PayloadSchemaType.KEYWORD),
],
}
# -------------------------
# Helpers
# -------------------------
def _create_indexes(client: QdrantClient, collection: str, specs):
"""Erzeugt fehlende Payload-Indizes als KEYWORD. Überspringt existierende."""
# Existenz der Collection prüfen (nicht fatal, falls fehlend)
try:
client.get_collection(collection)
print(f"[Bootstrap v1.2.1] Collection '{collection}' ok.")
except Exception as e:
print(f"[Bootstrap v1.2.1] WARN: Collection '{collection}' nicht gefunden (wird beim ersten Upsert erstellt). Details: {e}")
return
for field, schema in specs:
try:
client.create_payload_index(
collection_name=collection,
field_name=field,
field_schema=schema,
)
print(f"[Bootstrap v1.2.1] Index created: {collection}.{field} ({schema.name.lower()})")
except Exception as e:
# Meist: Already exists -> überspringen
print(f"[Bootstrap v1.2.1] Index exists or skipped: {collection}.{field} -> {e}")
# -------------------------
# Main
# -------------------------
def main():
print(f"[Bootstrap v1.2.1] Qdrant @ {QDRANT_HOST}:{QDRANT_PORT}")
print(f"[Bootstrap v1.2.1] Collections: TEMPLATES={TEMPLATES} PLANS={PLANS} SESSIONS={SESSIONS}")
client = QdrantClient(host=QDRANT_HOST, port=QDRANT_PORT)
for coll, specs in INDEX_SPECS.items():
_create_indexes(client, coll, specs)
print("[Bootstrap v1.2.1] done.")
if __name__ == "__main__":
main()