scripts/bootstrap_qdrant_plans.py aktualisiert
All checks were successful
Deploy Trainer_LLM to llm-node / deploy (push) Successful in 1s
All checks were successful
Deploy Trainer_LLM to llm-node / deploy (push) Successful in 1s
This commit is contained in:
parent
0c143124b3
commit
93cdde13a7
|
|
@ -1,106 +1,80 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
"""
|
"""
|
||||||
Bootstrap (idempotent) für Qdrant-Collections rund um Pläne – v1.2.1
|
Bootstrap (idempotent) für Qdrant-Collections rund um Pläne – v1.3.0
|
||||||
|
|
||||||
Änderungen ggü. v1.1.0:
|
- Fügt fehlende Payload-Indizes hinzu (KEYWORD/FLOAT), idempotent.
|
||||||
- NEU: KEYWORD-Index `plan_section_names` in der `plans`-Collection (robuste Section-Filter).
|
- NEU: FLOAT-Index `plans.created_at_ts` für serverseitige Zeitfensterfilter.
|
||||||
- Beibehalt: materialisierte Template-Facetten (section_*), Sessions-Indizes.
|
|
||||||
- Idempotent: existierende Indizes werden sauber übersprungen.
|
|
||||||
|
|
||||||
Hinweis:
|
|
||||||
- Das Script erstellt KEINE Collections; sie werden beim ersten Upsert durch die Router angelegt.
|
|
||||||
- Läuft gefahrlos mehrfach.
|
|
||||||
"""
|
"""
|
||||||
import os
|
import os
|
||||||
from qdrant_client import QdrantClient
|
from qdrant_client import QdrantClient
|
||||||
from qdrant_client.models import PayloadSchemaType
|
from qdrant_client.models import PayloadSchemaType
|
||||||
|
|
||||||
# -------------------------
|
|
||||||
# ENV / Defaults
|
|
||||||
# -------------------------
|
|
||||||
QDRANT_HOST = os.getenv("QDRANT_HOST", "localhost")
|
QDRANT_HOST = os.getenv("QDRANT_HOST", "localhost")
|
||||||
QDRANT_PORT = int(os.getenv("QDRANT_PORT", "6333"))
|
QDRANT_PORT = int(os.getenv("QDRANT_PORT", "6333"))
|
||||||
|
|
||||||
PLANS = os.getenv("PLAN_COLLECTION") or os.getenv("QDRANT_COLLECTION_PLANS", "plans")
|
PLANS = os.getenv("PLAN_COLLECTION") or os.getenv("QDRANT_COLLECTION_PLANS", "plans")
|
||||||
TEMPLATES = os.getenv("PLAN_TEMPLATE_COLLECTION", "plan_templates")
|
TEMPLATES = os.getenv("PLAN_TEMPLATE_COLLECTION", "plan_templates")
|
||||||
SESSIONS = os.getenv("PLAN_SESSION_COLLECTION", "plan_sessions")
|
SESSIONS = os.getenv("PLAN_SESSION_COLLECTION", "plan_sessions")
|
||||||
|
|
||||||
# -------------------------
|
|
||||||
# Index-Spezifikation (KEYWORD)
|
|
||||||
# -------------------------
|
|
||||||
INDEX_SPECS = {
|
INDEX_SPECS = {
|
||||||
# Templates: nutzen materialisierte Felder für stabile Indizes
|
|
||||||
TEMPLATES: [
|
TEMPLATES: [
|
||||||
("discipline", PayloadSchemaType.KEYWORD),
|
("discipline", PayloadSchemaType.KEYWORD),
|
||||||
("age_group", PayloadSchemaType.KEYWORD),
|
("age_group", PayloadSchemaType.KEYWORD),
|
||||||
("target_group", PayloadSchemaType.KEYWORD),
|
("target_group", PayloadSchemaType.KEYWORD),
|
||||||
("section_names", PayloadSchemaType.KEYWORD),
|
("section_names", PayloadSchemaType.KEYWORD),
|
||||||
("section_must_keywords", PayloadSchemaType.KEYWORD),
|
("section_must_keywords", PayloadSchemaType.KEYWORD),
|
||||||
("section_ideal_keywords", PayloadSchemaType.KEYWORD),
|
("section_ideal_keywords", PayloadSchemaType.KEYWORD),
|
||||||
("section_supplement_keywords", PayloadSchemaType.KEYWORD),
|
("section_supplement_keywords", PayloadSchemaType.KEYWORD),
|
||||||
("section_forbid_keywords", PayloadSchemaType.KEYWORD),
|
("section_forbid_keywords", PayloadSchemaType.KEYWORD),
|
||||||
("goals", PayloadSchemaType.KEYWORD),
|
("goals", PayloadSchemaType.KEYWORD),
|
||||||
],
|
],
|
||||||
# Plans: zusätzlich zum verschachtelten Pfad auch materialisierte Section-Namen
|
|
||||||
PLANS: [
|
PLANS: [
|
||||||
("discipline", PayloadSchemaType.KEYWORD),
|
("discipline", PayloadSchemaType.KEYWORD),
|
||||||
("age_group", PayloadSchemaType.KEYWORD),
|
("age_group", PayloadSchemaType.KEYWORD),
|
||||||
("target_group", PayloadSchemaType.KEYWORD),
|
("target_group", PayloadSchemaType.KEYWORD),
|
||||||
("sections.name", PayloadSchemaType.KEYWORD), # Alt; belassen für Kompatibilität
|
("sections.name", PayloadSchemaType.KEYWORD), # legacy, belassen
|
||||||
("plan_section_names", PayloadSchemaType.KEYWORD), # Neu & robust
|
("plan_section_names", PayloadSchemaType.KEYWORD),
|
||||||
("goals", PayloadSchemaType.KEYWORD),
|
("goals", PayloadSchemaType.KEYWORD),
|
||||||
("created_by", PayloadSchemaType.KEYWORD),
|
("created_by", PayloadSchemaType.KEYWORD),
|
||||||
("created_at", PayloadSchemaType.KEYWORD),
|
("created_at", PayloadSchemaType.KEYWORD),
|
||||||
("fingerprint", PayloadSchemaType.KEYWORD),
|
("created_at_ts", PayloadSchemaType.FLOAT), # NEU
|
||||||
("title", PayloadSchemaType.KEYWORD),
|
("fingerprint", PayloadSchemaType.KEYWORD),
|
||||||
|
("title", PayloadSchemaType.KEYWORD),
|
||||||
],
|
],
|
||||||
# Sessions
|
|
||||||
SESSIONS: [
|
SESSIONS: [
|
||||||
("plan_id", PayloadSchemaType.KEYWORD),
|
("plan_id", PayloadSchemaType.KEYWORD),
|
||||||
("executed_at", PayloadSchemaType.KEYWORD),
|
("executed_at", PayloadSchemaType.KEYWORD),
|
||||||
("coach", PayloadSchemaType.KEYWORD),
|
("coach", PayloadSchemaType.KEYWORD),
|
||||||
("group_label", PayloadSchemaType.KEYWORD),
|
("group_label", PayloadSchemaType.KEYWORD),
|
||||||
],
|
],
|
||||||
}
|
}
|
||||||
|
|
||||||
# -------------------------
|
|
||||||
# Helpers
|
|
||||||
# -------------------------
|
|
||||||
def _create_indexes(client: QdrantClient, collection: str, specs):
|
def _create_indexes(client: QdrantClient, collection: str, specs):
|
||||||
"""Erzeugt fehlende Payload-Indizes als KEYWORD. Überspringt existierende."""
|
|
||||||
# Existenz der Collection prüfen (nicht fatal, falls fehlend)
|
|
||||||
try:
|
try:
|
||||||
client.get_collection(collection)
|
client.get_collection(collection)
|
||||||
print(f"[Bootstrap v1.2.1] Collection '{collection}' ok.")
|
print(f"[Bootstrap v1.3.0] Collection '{collection}' ok.")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"[Bootstrap v1.2.1] WARN: Collection '{collection}' nicht gefunden (wird beim ersten Upsert erstellt). Details: {e}")
|
print(f"[Bootstrap v1.3.0] WARN: Collection '{collection}' nicht gefunden (wird beim ersten Upsert erstellt). Details: {e}")
|
||||||
return
|
return
|
||||||
|
|
||||||
for field, schema in specs:
|
for field, schema in specs:
|
||||||
try:
|
try:
|
||||||
client.create_payload_index(
|
client.create_payload_index(collection_name=collection, field_name=field, field_schema=schema)
|
||||||
collection_name=collection,
|
print(f"[Bootstrap v1.3.0] Index created: {collection}.{field} ({schema})")
|
||||||
field_name=field,
|
|
||||||
field_schema=schema,
|
|
||||||
)
|
|
||||||
print(f"[Bootstrap v1.2.1] Index created: {collection}.{field} ({schema.name.lower()})")
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# Meist: Already exists -> überspringen
|
print(f"[Bootstrap v1.3.0] Index exists or skipped: {collection}.{field} -> {e}")
|
||||||
print(f"[Bootstrap v1.2.1] Index exists or skipped: {collection}.{field} -> {e}")
|
|
||||||
|
|
||||||
# -------------------------
|
|
||||||
# Main
|
|
||||||
# -------------------------
|
|
||||||
def main():
|
def main():
|
||||||
print(f"[Bootstrap v1.2.1] Qdrant @ {QDRANT_HOST}:{QDRANT_PORT}")
|
print(f"[Bootstrap v1.3.0] Qdrant @ {QDRANT_HOST}:{QDRANT_PORT}")
|
||||||
print(f"[Bootstrap v1.2.1] Collections: TEMPLATES={TEMPLATES} PLANS={PLANS} SESSIONS={SESSIONS}")
|
print(f"[Bootstrap v1.3.0] Collections: TEMPLATES={TEMPLATES} PLANS={PLANS} SESSIONS={SESSIONS}")
|
||||||
client = QdrantClient(host=QDRANT_HOST, port=QDRANT_PORT)
|
client = QdrantClient(host=QDRANT_HOST, port=QDRANT_PORT)
|
||||||
|
|
||||||
for coll, specs in INDEX_SPECS.items():
|
for coll, specs in INDEX_SPECS.items():
|
||||||
_create_indexes(client, coll, specs)
|
_create_indexes(client, coll, specs)
|
||||||
|
print("[Bootstrap v1.3.0] done.")
|
||||||
|
|
||||||
print("[Bootstrap v1.2.1] done.")
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
Loading…
Reference in New Issue
Block a user