scripts/bootstrap_qdrant_plans.py aktualisiert
All checks were successful
Deploy Trainer_LLM to llm-node / deploy (push) Successful in 1s

This commit is contained in:
Lars 2025-08-13 08:56:08 +02:00
parent 0c143124b3
commit 93cdde13a7

View File

@ -1,106 +1,80 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
""" """
Bootstrap (idempotent) für Qdrant-Collections rund um Pläne v1.2.1 Bootstrap (idempotent) für Qdrant-Collections rund um Pläne v1.3.0
Änderungen ggü. v1.1.0: - Fügt fehlende Payload-Indizes hinzu (KEYWORD/FLOAT), idempotent.
- NEU: KEYWORD-Index `plan_section_names` in der `plans`-Collection (robuste Section-Filter). - NEU: FLOAT-Index `plans.created_at_ts` für serverseitige Zeitfensterfilter.
- Beibehalt: materialisierte Template-Facetten (section_*), Sessions-Indizes.
- Idempotent: existierende Indizes werden sauber übersprungen.
Hinweis:
- Das Script erstellt KEINE Collections; sie werden beim ersten Upsert durch die Router angelegt.
- Läuft gefahrlos mehrfach.
""" """
import os import os
from qdrant_client import QdrantClient from qdrant_client import QdrantClient
from qdrant_client.models import PayloadSchemaType from qdrant_client.models import PayloadSchemaType
# -------------------------
# ENV / Defaults
# -------------------------
QDRANT_HOST = os.getenv("QDRANT_HOST", "localhost") QDRANT_HOST = os.getenv("QDRANT_HOST", "localhost")
QDRANT_PORT = int(os.getenv("QDRANT_PORT", "6333")) QDRANT_PORT = int(os.getenv("QDRANT_PORT", "6333"))
PLANS = os.getenv("PLAN_COLLECTION") or os.getenv("QDRANT_COLLECTION_PLANS", "plans") PLANS = os.getenv("PLAN_COLLECTION") or os.getenv("QDRANT_COLLECTION_PLANS", "plans")
TEMPLATES = os.getenv("PLAN_TEMPLATE_COLLECTION", "plan_templates") TEMPLATES = os.getenv("PLAN_TEMPLATE_COLLECTION", "plan_templates")
SESSIONS = os.getenv("PLAN_SESSION_COLLECTION", "plan_sessions") SESSIONS = os.getenv("PLAN_SESSION_COLLECTION", "plan_sessions")
# -------------------------
# Index-Spezifikation (KEYWORD)
# -------------------------
INDEX_SPECS = { INDEX_SPECS = {
# Templates: nutzen materialisierte Felder für stabile Indizes
TEMPLATES: [ TEMPLATES: [
("discipline", PayloadSchemaType.KEYWORD), ("discipline", PayloadSchemaType.KEYWORD),
("age_group", PayloadSchemaType.KEYWORD), ("age_group", PayloadSchemaType.KEYWORD),
("target_group", PayloadSchemaType.KEYWORD), ("target_group", PayloadSchemaType.KEYWORD),
("section_names", PayloadSchemaType.KEYWORD), ("section_names", PayloadSchemaType.KEYWORD),
("section_must_keywords", PayloadSchemaType.KEYWORD), ("section_must_keywords", PayloadSchemaType.KEYWORD),
("section_ideal_keywords", PayloadSchemaType.KEYWORD), ("section_ideal_keywords", PayloadSchemaType.KEYWORD),
("section_supplement_keywords", PayloadSchemaType.KEYWORD), ("section_supplement_keywords", PayloadSchemaType.KEYWORD),
("section_forbid_keywords", PayloadSchemaType.KEYWORD), ("section_forbid_keywords", PayloadSchemaType.KEYWORD),
("goals", PayloadSchemaType.KEYWORD), ("goals", PayloadSchemaType.KEYWORD),
], ],
# Plans: zusätzlich zum verschachtelten Pfad auch materialisierte Section-Namen
PLANS: [ PLANS: [
("discipline", PayloadSchemaType.KEYWORD), ("discipline", PayloadSchemaType.KEYWORD),
("age_group", PayloadSchemaType.KEYWORD), ("age_group", PayloadSchemaType.KEYWORD),
("target_group", PayloadSchemaType.KEYWORD), ("target_group", PayloadSchemaType.KEYWORD),
("sections.name", PayloadSchemaType.KEYWORD), # Alt; belassen für Kompatibilität ("sections.name", PayloadSchemaType.KEYWORD), # legacy, belassen
("plan_section_names", PayloadSchemaType.KEYWORD), # Neu & robust ("plan_section_names", PayloadSchemaType.KEYWORD),
("goals", PayloadSchemaType.KEYWORD), ("goals", PayloadSchemaType.KEYWORD),
("created_by", PayloadSchemaType.KEYWORD), ("created_by", PayloadSchemaType.KEYWORD),
("created_at", PayloadSchemaType.KEYWORD), ("created_at", PayloadSchemaType.KEYWORD),
("fingerprint", PayloadSchemaType.KEYWORD), ("created_at_ts", PayloadSchemaType.FLOAT), # NEU
("title", PayloadSchemaType.KEYWORD), ("fingerprint", PayloadSchemaType.KEYWORD),
("title", PayloadSchemaType.KEYWORD),
], ],
# Sessions
SESSIONS: [ SESSIONS: [
("plan_id", PayloadSchemaType.KEYWORD), ("plan_id", PayloadSchemaType.KEYWORD),
("executed_at", PayloadSchemaType.KEYWORD), ("executed_at", PayloadSchemaType.KEYWORD),
("coach", PayloadSchemaType.KEYWORD), ("coach", PayloadSchemaType.KEYWORD),
("group_label", PayloadSchemaType.KEYWORD), ("group_label", PayloadSchemaType.KEYWORD),
], ],
} }
# -------------------------
# Helpers
# -------------------------
def _create_indexes(client: QdrantClient, collection: str, specs): def _create_indexes(client: QdrantClient, collection: str, specs):
"""Erzeugt fehlende Payload-Indizes als KEYWORD. Überspringt existierende."""
# Existenz der Collection prüfen (nicht fatal, falls fehlend)
try: try:
client.get_collection(collection) client.get_collection(collection)
print(f"[Bootstrap v1.2.1] Collection '{collection}' ok.") print(f"[Bootstrap v1.3.0] Collection '{collection}' ok.")
except Exception as e: except Exception as e:
print(f"[Bootstrap v1.2.1] WARN: Collection '{collection}' nicht gefunden (wird beim ersten Upsert erstellt). Details: {e}") print(f"[Bootstrap v1.3.0] WARN: Collection '{collection}' nicht gefunden (wird beim ersten Upsert erstellt). Details: {e}")
return return
for field, schema in specs: for field, schema in specs:
try: try:
client.create_payload_index( client.create_payload_index(collection_name=collection, field_name=field, field_schema=schema)
collection_name=collection, print(f"[Bootstrap v1.3.0] Index created: {collection}.{field} ({schema})")
field_name=field,
field_schema=schema,
)
print(f"[Bootstrap v1.2.1] Index created: {collection}.{field} ({schema.name.lower()})")
except Exception as e: except Exception as e:
# Meist: Already exists -> überspringen print(f"[Bootstrap v1.3.0] Index exists or skipped: {collection}.{field} -> {e}")
print(f"[Bootstrap v1.2.1] Index exists or skipped: {collection}.{field} -> {e}")
# -------------------------
# Main
# -------------------------
def main(): def main():
print(f"[Bootstrap v1.2.1] Qdrant @ {QDRANT_HOST}:{QDRANT_PORT}") print(f"[Bootstrap v1.3.0] Qdrant @ {QDRANT_HOST}:{QDRANT_PORT}")
print(f"[Bootstrap v1.2.1] Collections: TEMPLATES={TEMPLATES} PLANS={PLANS} SESSIONS={SESSIONS}") print(f"[Bootstrap v1.3.0] Collections: TEMPLATES={TEMPLATES} PLANS={PLANS} SESSIONS={SESSIONS}")
client = QdrantClient(host=QDRANT_HOST, port=QDRANT_PORT) client = QdrantClient(host=QDRANT_HOST, port=QDRANT_PORT)
for coll, specs in INDEX_SPECS.items(): for coll, specs in INDEX_SPECS.items():
_create_indexes(client, coll, specs) _create_indexes(client, coll, specs)
print("[Bootstrap v1.3.0] done.")
print("[Bootstrap v1.2.1] done.")
if __name__ == "__main__": if __name__ == "__main__":
main() main()