From 93cdde13a79322fcfdfd84a19eeed38aeff262e3 Mon Sep 17 00:00:00 2001 From: Lars Date: Wed, 13 Aug 2025 08:56:08 +0200 Subject: [PATCH] scripts/bootstrap_qdrant_plans.py aktualisiert --- scripts/bootstrap_qdrant_plans.py | 100 +++++++++++------------------- 1 file changed, 37 insertions(+), 63 deletions(-) diff --git a/scripts/bootstrap_qdrant_plans.py b/scripts/bootstrap_qdrant_plans.py index c1071d3..a403ba1 100644 --- a/scripts/bootstrap_qdrant_plans.py +++ b/scripts/bootstrap_qdrant_plans.py @@ -1,106 +1,80 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- """ -Bootstrap (idempotent) für Qdrant-Collections rund um Pläne – v1.2.1 +Bootstrap (idempotent) für Qdrant-Collections rund um Pläne – v1.3.0 -Änderungen ggü. v1.1.0: -- NEU: KEYWORD-Index `plan_section_names` in der `plans`-Collection (robuste Section-Filter). -- Beibehalt: materialisierte Template-Facetten (section_*), Sessions-Indizes. -- Idempotent: existierende Indizes werden sauber übersprungen. - -Hinweis: -- Das Script erstellt KEINE Collections; sie werden beim ersten Upsert durch die Router angelegt. -- Läuft gefahrlos mehrfach. +- Fügt fehlende Payload-Indizes hinzu (KEYWORD/FLOAT), idempotent. +- NEU: FLOAT-Index `plans.created_at_ts` für serverseitige Zeitfensterfilter. """ import os from qdrant_client import QdrantClient from qdrant_client.models import PayloadSchemaType -# ------------------------- -# ENV / Defaults -# ------------------------- QDRANT_HOST = os.getenv("QDRANT_HOST", "localhost") QDRANT_PORT = int(os.getenv("QDRANT_PORT", "6333")) -PLANS = os.getenv("PLAN_COLLECTION") or os.getenv("QDRANT_COLLECTION_PLANS", "plans") +PLANS = os.getenv("PLAN_COLLECTION") or os.getenv("QDRANT_COLLECTION_PLANS", "plans") TEMPLATES = os.getenv("PLAN_TEMPLATE_COLLECTION", "plan_templates") -SESSIONS = os.getenv("PLAN_SESSION_COLLECTION", "plan_sessions") +SESSIONS = os.getenv("PLAN_SESSION_COLLECTION", "plan_sessions") -# ------------------------- -# Index-Spezifikation (KEYWORD) -# ------------------------- INDEX_SPECS = { - # Templates: nutzen materialisierte Felder für stabile Indizes TEMPLATES: [ - ("discipline", PayloadSchemaType.KEYWORD), - ("age_group", PayloadSchemaType.KEYWORD), - ("target_group", PayloadSchemaType.KEYWORD), - ("section_names", PayloadSchemaType.KEYWORD), - ("section_must_keywords", PayloadSchemaType.KEYWORD), - ("section_ideal_keywords", PayloadSchemaType.KEYWORD), + ("discipline", PayloadSchemaType.KEYWORD), + ("age_group", PayloadSchemaType.KEYWORD), + ("target_group", PayloadSchemaType.KEYWORD), + ("section_names", PayloadSchemaType.KEYWORD), + ("section_must_keywords", PayloadSchemaType.KEYWORD), + ("section_ideal_keywords", PayloadSchemaType.KEYWORD), ("section_supplement_keywords", PayloadSchemaType.KEYWORD), - ("section_forbid_keywords", PayloadSchemaType.KEYWORD), - ("goals", PayloadSchemaType.KEYWORD), + ("section_forbid_keywords", PayloadSchemaType.KEYWORD), + ("goals", PayloadSchemaType.KEYWORD), ], - # Plans: zusätzlich zum verschachtelten Pfad auch materialisierte Section-Namen PLANS: [ - ("discipline", PayloadSchemaType.KEYWORD), - ("age_group", PayloadSchemaType.KEYWORD), - ("target_group", PayloadSchemaType.KEYWORD), - ("sections.name", PayloadSchemaType.KEYWORD), # Alt; belassen für Kompatibilität - ("plan_section_names", PayloadSchemaType.KEYWORD), # Neu & robust - ("goals", PayloadSchemaType.KEYWORD), - ("created_by", PayloadSchemaType.KEYWORD), - ("created_at", PayloadSchemaType.KEYWORD), - ("fingerprint", PayloadSchemaType.KEYWORD), - ("title", PayloadSchemaType.KEYWORD), + ("discipline", PayloadSchemaType.KEYWORD), + ("age_group", PayloadSchemaType.KEYWORD), + ("target_group", PayloadSchemaType.KEYWORD), + ("sections.name", PayloadSchemaType.KEYWORD), # legacy, belassen + ("plan_section_names", PayloadSchemaType.KEYWORD), + ("goals", PayloadSchemaType.KEYWORD), + ("created_by", PayloadSchemaType.KEYWORD), + ("created_at", PayloadSchemaType.KEYWORD), + ("created_at_ts", PayloadSchemaType.FLOAT), # NEU + ("fingerprint", PayloadSchemaType.KEYWORD), + ("title", PayloadSchemaType.KEYWORD), ], - # Sessions SESSIONS: [ - ("plan_id", PayloadSchemaType.KEYWORD), + ("plan_id", PayloadSchemaType.KEYWORD), ("executed_at", PayloadSchemaType.KEYWORD), - ("coach", PayloadSchemaType.KEYWORD), + ("coach", PayloadSchemaType.KEYWORD), ("group_label", PayloadSchemaType.KEYWORD), ], } -# ------------------------- -# Helpers -# ------------------------- + def _create_indexes(client: QdrantClient, collection: str, specs): - """Erzeugt fehlende Payload-Indizes als KEYWORD. Überspringt existierende.""" - # Existenz der Collection prüfen (nicht fatal, falls fehlend) try: client.get_collection(collection) - print(f"[Bootstrap v1.2.1] Collection '{collection}' ok.") + print(f"[Bootstrap v1.3.0] Collection '{collection}' ok.") except Exception as e: - print(f"[Bootstrap v1.2.1] WARN: Collection '{collection}' nicht gefunden (wird beim ersten Upsert erstellt). Details: {e}") + print(f"[Bootstrap v1.3.0] WARN: Collection '{collection}' nicht gefunden (wird beim ersten Upsert erstellt). Details: {e}") return for field, schema in specs: try: - client.create_payload_index( - collection_name=collection, - field_name=field, - field_schema=schema, - ) - print(f"[Bootstrap v1.2.1] Index created: {collection}.{field} ({schema.name.lower()})") + client.create_payload_index(collection_name=collection, field_name=field, field_schema=schema) + print(f"[Bootstrap v1.3.0] Index created: {collection}.{field} ({schema})") except Exception as e: - # Meist: Already exists -> überspringen - print(f"[Bootstrap v1.2.1] Index exists or skipped: {collection}.{field} -> {e}") + print(f"[Bootstrap v1.3.0] Index exists or skipped: {collection}.{field} -> {e}") + -# ------------------------- -# Main -# ------------------------- def main(): - print(f"[Bootstrap v1.2.1] Qdrant @ {QDRANT_HOST}:{QDRANT_PORT}") - print(f"[Bootstrap v1.2.1] Collections: TEMPLATES={TEMPLATES} PLANS={PLANS} SESSIONS={SESSIONS}") + print(f"[Bootstrap v1.3.0] Qdrant @ {QDRANT_HOST}:{QDRANT_PORT}") + print(f"[Bootstrap v1.3.0] Collections: TEMPLATES={TEMPLATES} PLANS={PLANS} SESSIONS={SESSIONS}") client = QdrantClient(host=QDRANT_HOST, port=QDRANT_PORT) - for coll, specs in INDEX_SPECS.items(): _create_indexes(client, coll, specs) + print("[Bootstrap v1.3.0] done.") - print("[Bootstrap v1.2.1] done.") if __name__ == "__main__": - main() + main() \ No newline at end of file