Compare commits
87 Commits
stable-202
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
| cadd23e554 | |||
| ad6df74ef4 | |||
| 9327bc48d8 | |||
| 508fafd0df | |||
| 1d50e7042e | |||
| 6a4e97f4e4 | |||
| 59e7e64af7 | |||
| 249f1aeea0 | |||
| 0b34b85a5a | |||
| 00a8837aa1 | |||
| 5e2591fb56 | |||
| 93cdde13a7 | |||
| 0c143124b3 | |||
| 58d2260d89 | |||
| 070f9967bc | |||
| d2af8881a8 | |||
| 9fd41ce3f0 | |||
| d9eefcb1fa | |||
| 78cf89c0fa | |||
| c0de60e4a5 | |||
| 123df8a48a | |||
| 7f821b5723 | |||
| 597b94ff25 | |||
| 375ed57778 | |||
| 41e5db3921 | |||
| af08c64032 | |||
| ed05448e56 | |||
| ff58caaad0 | |||
| 36c82ac942 | |||
| 4fbfdb1c6a | |||
| 0805e48fe6 | |||
| 427d3f5419 | |||
| 16890af944 | |||
| 32e673044f | |||
| 40b1151023 | |||
| 5c51d3bc4f | |||
| 1dbcf33540 | |||
| 3806f4ac47 | |||
| 4d67cd9d66 | |||
| 9c955db191 | |||
| 81473e20eb | |||
| 798e103eb8 | |||
| d65129f477 | |||
| 482605e6a1 | |||
| 47b2519b0b | |||
| c3b2ee3310 | |||
| 31d1e85b5c | |||
| 5dbe887ce3 | |||
| 4552e33cb3 | |||
| c53aade360 | |||
| d88979e37a | |||
| 11373138ca | |||
| 75b257bb15 | |||
| c0bb562a8d | |||
| 32577a7fda | |||
| a6d68134cd | |||
| fa8a92208a | |||
| 0c047b708f | |||
| efbb978074 | |||
| 21ce1dc395 | |||
| 380b361e70 | |||
| 2a859aa16c | |||
| d14ed9a511 | |||
| 7b383f0778 | |||
| 34320b46d9 | |||
| e12fd8f96a | |||
| cf085f8ef0 | |||
| 6bab3cdf04 | |||
| 2567d8c786 | |||
| d8d12e0b6b | |||
| bba860d839 | |||
| a0d1b86b53 | |||
| 605fe2ebaf | |||
| 811510ac66 | |||
| b2808f82df | |||
| 97bc283ce1 | |||
| d9abcb3ef4 | |||
| 5c9c8951c1 | |||
| 633916d5b7 | |||
| 55a1f5bba2 | |||
| 7d486f0a41 | |||
| 88672a19d5 | |||
| d3b781679a | |||
| a02008ec17 | |||
| 8302a7fecf | |||
| 4d5d36d6e7 | |||
| 8b6b47c63e |
|
|
@ -3,10 +3,7 @@ name: Deploy Trainer_LLM to llm-node
|
|||
on:
|
||||
push:
|
||||
branches: [ "main" ]
|
||||
paths:
|
||||
- "knowledge/**"
|
||||
- "llm-api/**"
|
||||
- "scripts/**"
|
||||
# Kein paths-Filter mehr: neue Ordner deployen sofort mit, sobald sie in DEPLOY_DIRS stehen.
|
||||
|
||||
concurrency:
|
||||
group: deploy-trainer-llm
|
||||
|
|
@ -14,56 +11,52 @@ concurrency:
|
|||
|
||||
jobs:
|
||||
deploy:
|
||||
runs-on: linux_host # muss zum Runner-Label passen
|
||||
runs-on: linux_host
|
||||
env:
|
||||
# -> Hier trägst du ALLE zu deployenden Top-Level-Verzeichnisse ein:
|
||||
# Bei neuen Ordnern einfach anhängen (durch Leerzeichen getrennt)
|
||||
DEPLOY_DIRS: "knowledge llm-api scripts schemas tests"
|
||||
TARGET_BASE: "/home/llmadmin"
|
||||
steps:
|
||||
- name: Checkout
|
||||
# Absolute URL reduziert Abhängigkeit von DEFAULT_ACTIONS_URL
|
||||
uses: https://github.com/actions/checkout@v4
|
||||
|
||||
- name: Sanity — Runner & Commit
|
||||
run: |
|
||||
echo "Runner: $RUNNER_NAME Labels: $RUNNER_LABELS"
|
||||
echo "Commit: $GITEA_SHA Ref: $GITEA_REF"
|
||||
echo "Commit: ${GITHUB_SHA:-$GITEA_SHA} Ref: ${GITHUB_REF:-$GITEA_REF}"
|
||||
uname -a
|
||||
|
||||
- name: Debug whoami & write test
|
||||
- name: Ensure target base exists
|
||||
run: |
|
||||
whoami
|
||||
id
|
||||
getent passwd $(whoami) || true
|
||||
# Testet Schreibrecht unter /home/llmadmin
|
||||
touch /home/llmadmin/.write_test && rm /home/llmadmin/.write_test || echo "no write"
|
||||
install -d "$TARGET_BASE"
|
||||
|
||||
- name: Ensure target directories exist
|
||||
- name: Deploy whitelisted directories
|
||||
run: |
|
||||
mkdir -p /home/llmadmin/knowledge
|
||||
mkdir -p /home/llmadmin/llm-api
|
||||
mkdir -p /home/llmadmin/scripts
|
||||
set -euo pipefail
|
||||
IFS=' ' read -r -a DIRS <<< "$DEPLOY_DIRS"
|
||||
|
||||
- name: Rsync knowledge/
|
||||
if: ${{ hashFiles('knowledge/**') != '' }}
|
||||
run: |
|
||||
rsync -a --delete --exclude='.git' knowledge/ /home/llmadmin/knowledge/
|
||||
echo "Synced knowledge/"
|
||||
|
||||
- name: Rsync llm-api/
|
||||
if: ${{ hashFiles('llm-api/**') != '' }}
|
||||
run: |
|
||||
rsync -a --delete --exclude='.git' llm-api/ /home/llmadmin/llm-api/
|
||||
echo "Synced llm-api/"
|
||||
|
||||
- name: Rsync scripts/
|
||||
if: ${{ hashFiles('scripts/**') != '' }}
|
||||
run: |
|
||||
rsync -a --delete --exclude='.git' scripts/ /home/llmadmin/scripts/
|
||||
echo "Synced scripts/"
|
||||
for d in "${DIRS[@]}"; do
|
||||
if [ -d "$d" ]; then
|
||||
# Nur wenn im Ordner auch Dateien/Unterordner liegen
|
||||
if [ -n "$(find "$d" -mindepth 1 -print -quit)" ]; then
|
||||
echo ">> Syncing $d -> $TARGET_BASE/$d"
|
||||
install -d "$TARGET_BASE/$d"
|
||||
rsync -a --delete \
|
||||
--exclude='.git' \
|
||||
--exclude='.env' --exclude='.env.*' --exclude='**/.env' --exclude='**/.env.*' \
|
||||
"$d"/ "$TARGET_BASE/$d"/
|
||||
else
|
||||
echo ">> Skipping $d (leer)"
|
||||
fi
|
||||
else
|
||||
echo ">> Skipping $d (existiert nicht im Repo)"
|
||||
fi
|
||||
done
|
||||
|
||||
- name: Optional — systemctl --user restart llm-api (ignore if missing)
|
||||
continue-on-error: true
|
||||
env:
|
||||
XDG_RUNTIME_DIR: /run/user/${{ inputs.uid || 1000 }}
|
||||
run: |
|
||||
# Versuche nur zu restarten, wenn der Service existiert
|
||||
if systemctl --user list-unit-files | grep -q '^llm-api.service'; then
|
||||
systemctl --user restart llm-api.service
|
||||
systemctl --user --no-pager status llm-api.service --full -l || true
|
||||
|
|
@ -71,9 +64,9 @@ jobs:
|
|||
echo "llm-api.service nicht gefunden — Schritt wird übersprungen."
|
||||
fi
|
||||
|
||||
- name: Post-check — show latest changes
|
||||
- name: Post-check — list targets
|
||||
run: |
|
||||
echo "Deploy complete. Listing targets:"
|
||||
ls -la /home/llmadmin/knowledge | tail -n +1 || true
|
||||
ls -la /home/llmadmin/llm-api | tail -n +1 || true
|
||||
ls -la /home/llmadmin/scripts | tail -n +1 || true
|
||||
for d in $DEPLOY_DIRS; do
|
||||
echo "== $TARGET_BASE/$d =="
|
||||
ls -la "$TARGET_BASE/$d" 2>/dev/null | tail -n +1 || true
|
||||
done
|
||||
|
|
|
|||
90
PMO/WP-17-kickoff.md
Normal file
90
PMO/WP-17-kickoff.md
Normal file
|
|
@ -0,0 +1,90 @@
|
|||
# WP-17 – Retriever & Composer (Kern ohne LLM)
|
||||
|
||||
## Projektkontext
|
||||
Wir entwickeln eine deterministische Planerstellung aus bestehenden **plan_templates** und **exercises**.
|
||||
WP-15 hat die Collections, Indizes und CRUD-APIs für `plan_templates` und `plans` produktiv geliefert.
|
||||
WP-02 stellt die exercises-Collection mit Capabilities und Qdrant-Anbindung bereit.
|
||||
|
||||
**Technologie-Stack:** Python 3.12, FastAPI, Qdrant
|
||||
|
||||
---
|
||||
|
||||
## Ziele
|
||||
Implementierung eines `/plan/generate`-Endpoints, der:
|
||||
|
||||
- Filter- und Vektor-Suche in Qdrant kombiniert
|
||||
- Scoring nach Coverage, Diversity und Novelty durchführt
|
||||
- Pläne deterministisch und ohne LLM generiert
|
||||
- Zeitbudgets einhält und Wiederholungen (Novelty-Penalty) vermeidet
|
||||
|
||||
---
|
||||
|
||||
## Deliverables
|
||||
1. **API**: POST `/plan/generate`
|
||||
- Parameter: `discipline`, `age_group`, `target_group`, `goals`, `time_budget_minutes`, `novelty_horizon` (5), `coverage_threshold` (0.8), `strict_mode`
|
||||
- Rückgabe: Plan-JSON mit Exercises-Referenzen und Metadaten
|
||||
|
||||
2. **Retriever**
|
||||
- Filter-Layer (Payload)
|
||||
- Vector-Layer (Ranking)
|
||||
- Kombinierte Gewichtung
|
||||
|
||||
3. **Composer**
|
||||
- Sections aufbauen (aus Template oder Default)
|
||||
- Zeitbudget pro Section und Gesamt einhalten
|
||||
- Strict-Mode: nur gültige `external_id`
|
||||
|
||||
4. **Scoring-Funktionen**
|
||||
- Coverage (Capabilites-Abdeckung)
|
||||
- Diversity (Variabilität)
|
||||
- Novelty (Neuheit gegenüber Historie)
|
||||
|
||||
5. **Tests**
|
||||
- Unit-Tests (Scoring, Filter)
|
||||
- E2E: Template → Retriever → Composer → Persistenz
|
||||
|
||||
6. **Dokumentation**
|
||||
- OpenAPI-Beispiele, Parametrierung, Konfigurationsoptionen
|
||||
|
||||
---
|
||||
|
||||
## Akzeptanzkriterien
|
||||
- Identische Eingaben → identischer Plan (Determinismus)
|
||||
- Keine doppelten Übungen im Plan
|
||||
- Budget- und Coverage-Ziele in ≥95 % der Testfälle erreicht
|
||||
- Novelty-Penalty wirkt wie konfiguriert
|
||||
|
||||
---
|
||||
|
||||
## Risiken
|
||||
- Konflikte zwischen Budget, Coverage, Novelty (Priorisierung erforderlich)
|
||||
- Geringe Übungsvielfalt → eingeschränkte Ergebnisse
|
||||
- Performance-Einbußen bei großen Collections
|
||||
|
||||
---
|
||||
|
||||
## Technische Vorgaben
|
||||
**Voreinstellungen:**
|
||||
- `novelty_horizon`: 5
|
||||
- `coverage_threshold`: 0.8
|
||||
- Priorität bei Konflikt: 1. Budget, 2. Coverage, 3. Novelty
|
||||
|
||||
**Benötigte Dateien:**
|
||||
- `llm-api/plan_router.py` (v0.13.4)
|
||||
- `llm-api/exercise_router.py` (aus WP-02)
|
||||
- `scripts/bootstrap_qdrant_plans.py` (v1.3.x)
|
||||
- Schema-Definitionen für `plan_templates` und `plans`
|
||||
- Beispiel-Datensätze (Golden-Cases)
|
||||
- `.env` (ohne Secrets, mit API-URLs)
|
||||
|
||||
---
|
||||
|
||||
## Prompt für das Entwicklerteam (direkt nutzbar)
|
||||
> **Rolle:** Entwicklerteam WP-17 – Retriever & Composer (Kern ohne LLM)
|
||||
> **Aufgabe:** Implementiere `/plan/generate`, der deterministisch aus plan_templates und exercises Pläne generiert.
|
||||
> Nutze Filter- und Vektor-Suche in Qdrant, Scoring-Funktionen (Coverage, Diversity, Novelty) und eine Composer-Logik, die Zeitbudgets einhält.
|
||||
> **Parameter:** discipline, age_group, target_group, goals, time_budget_minutes, novelty_horizon=5, coverage_threshold=0.8, strict_mode.
|
||||
> **Anforderungen:** Deterministische Ergebnisse, keine Duplikate, ≥95 % Zielerreichung bei Budget/Coverage, funktionierender Novelty-Penalty.
|
||||
> **Rahmen:** Python 3.12, FastAPI, Qdrant, vorhandene plan_templates/plans/exercises-Collections.
|
||||
> **Liefere:** Code, Unit- und E2E-Tests, OpenAPI-Doku mit Beispielen.
|
||||
> **Dateien:** siehe Liste oben.
|
||||
77
llm-api/.env.example
Normal file
77
llm-api/.env.example
Normal file
|
|
@ -0,0 +1,77 @@
|
|||
# ======================
|
||||
# Laufzeit / Server
|
||||
# ======================
|
||||
UVICORN_HOST=0.0.0.0
|
||||
UVICORN_PORT=8000
|
||||
LOG_LEVEL=INFO
|
||||
|
||||
# ======================
|
||||
# Qdrant – Verbindung
|
||||
# ======================
|
||||
QDRANT_HOST=127.0.0.1
|
||||
QDRANT_PORT=6333
|
||||
QDRANT_URL=http://localhost:6333
|
||||
|
||||
# ======================
|
||||
# Collections – Namen
|
||||
# Hinweise:
|
||||
# - PLAN_COLLECTION wird von unseren neuen Routern verwendet.
|
||||
# - Einige ältere Komponenten nutzen ggf. *QDRANT_COLLECTION_PLANS*/*QDRANT_COLLECTION_EXERCISES*.
|
||||
# Belasse sie konsistent oder kommentiere sie aus, um Verwirrung zu vermeiden.
|
||||
# ======================
|
||||
PLAN_COLLECTION=plans
|
||||
PLAN_TEMPLATE_COLLECTION=plan_templates
|
||||
PLAN_SESSION_COLLECTION=plan_sessions
|
||||
EXERCISE_COLLECTION=exercises
|
||||
# Kompatibilität (optional, falls von Alt-Code gelesen):
|
||||
# QDRANT_COLLECTION_PLANS=training_plans
|
||||
# QDRANT_COLLECTION_EXERCISES=exercises
|
||||
|
||||
# ======================
|
||||
# Strict-Mode für /plan
|
||||
# 0 / leer = aus (Standard)
|
||||
# 1/true/...= an → jede exercise_external_id muss in EXERCISE_COLLECTION existieren, sonst 422
|
||||
# ======================
|
||||
PLAN_STRICT_EXERCISES=0
|
||||
|
||||
# ======================
|
||||
# Ollama (LLM) – lokal
|
||||
# ======================
|
||||
OLLAMA_URL=http://127.0.0.1:11434/api/generate
|
||||
OLLAMA_ENDPOINT=/api/generate
|
||||
OLLAMA_MODEL=mistral
|
||||
OLLAMA_TIMEOUT_SECONDS=120
|
||||
|
||||
# ======================
|
||||
# Embeddings
|
||||
# ======================
|
||||
EMBEDDING_MODEL=all-MiniLM-L6-v2
|
||||
EMBEDDING_DIM=384
|
||||
|
||||
# ======================
|
||||
# FastAPI / App Defaults
|
||||
# ======================
|
||||
DEFAULT_COLLECTION=default
|
||||
API_TITLE="KI Trainerassistent API"
|
||||
API_DESCRIPTION="Lokale API für Trainingsplanung (Karate, Gewaltschutz, etc.)"
|
||||
|
||||
# ======================
|
||||
# Wiki Importer
|
||||
# ======================
|
||||
API_BASE_URL=http://localhost:8000
|
||||
WIKI_BASE_URL=https://karatetrainer.net
|
||||
WIKI_API_URL=https://karatetrainer.net/api.php
|
||||
WIKI_BOT_USER=Bot
|
||||
WIKI_BOT_PASSWORD=***set_me***
|
||||
WIKI_SMW_LIMIT=500
|
||||
WIKI_SMW_OFFSET=0
|
||||
WIKI_TIMEOUT=15
|
||||
WIKI_BATCH=50
|
||||
WIKI_RETRIES=1
|
||||
WIKI_SLEEP_MS=0
|
||||
|
||||
# ======================
|
||||
# Test-/Hilfs-URLs (für pytest & Tools)
|
||||
# ======================
|
||||
BASE_URL=http://127.0.0.1:8000
|
||||
QDRANT_BASE=http://127.0.0.1:6333
|
||||
45
llm-api/audit_ki_stack.sh
Normal file
45
llm-api/audit_ki_stack.sh
Normal file
|
|
@ -0,0 +1,45 @@
|
|||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
echo "=== SYSTEM ==="
|
||||
uname -a || true
|
||||
echo
|
||||
echo "CPU/Mem:"
|
||||
lscpu | egrep 'Model name|CPU\(s\)|Thread|Core|Socket' || true
|
||||
free -h || true
|
||||
echo
|
||||
echo "Disk:"
|
||||
df -hT | awk 'NR==1 || /\/(srv|opt|home|var|$)/'
|
||||
echo
|
||||
|
||||
echo "=== DOCKER ==="
|
||||
docker --version || true
|
||||
docker compose version || docker-compose --version || true
|
||||
echo
|
||||
echo "Running containers:"
|
||||
docker ps --format "table {{.Names}}\t{{.Image}}\t{{.Ports}}" || true
|
||||
echo
|
||||
|
||||
echo "=== PYTHON ==="
|
||||
python3 --version || true
|
||||
python3.12 --version || true
|
||||
pip --version || true
|
||||
echo
|
||||
|
||||
echo "=== NODE/NPM (für n8n, falls nativ) ==="
|
||||
node -v || true
|
||||
npm -v || true
|
||||
echo
|
||||
|
||||
echo "=== BESETZTE PORTS (root zeigt Prozesse) ==="
|
||||
for p in 8000 6333 11434 5678; do
|
||||
echo "--- Port $p ---"
|
||||
(sudo ss -ltnp | grep ":$p ") || echo "frei"
|
||||
done
|
||||
echo
|
||||
|
||||
echo "=== DIENSTE / HINWEISE ==="
|
||||
systemctl list-units --type=service | egrep -i 'qdrant|ollama|n8n|uvicorn|gunicorn' || true
|
||||
echo
|
||||
|
||||
echo "Fertig. Prüfe ob Ports frei sind und welche Container bereits laufen."
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
# Kommentarzeile
|
||||
# Kommentarzeile zum Testen ob der Server die Änderung erkennt
|
||||
from fastapi import APIRouter, HTTPException, Query
|
||||
from fastapi.responses import JSONResponse
|
||||
from pydantic import BaseModel, Field
|
||||
|
|
|
|||
|
|
@ -1,183 +1,582 @@
|
|||
# Test eines Kommentars, um die Funktion des gitea testen zu können
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
exercise_router.py – v1.7.1 (Swagger angereichert)
|
||||
|
||||
Ergänzt:
|
||||
- Aussagekräftige summary/description/response_description je Endpoint
|
||||
- Beispiele (x-codeSamples) für curl-Aufrufe
|
||||
- Pydantic-Felder mit description + json_schema_extra (Beispiele)
|
||||
- Keine API-Signatur-/Pfadänderungen, keine Prefix-Änderungen
|
||||
|
||||
Hinweis:
|
||||
- Endpunkte bleiben weiterhin unter /exercise/* (weil die Routenstrings bereits /exercise/... enthalten).
|
||||
- Falls du später einen APIRouter-Prefix setzen willst, dann bitte die Pfade unten von '/exercise/...' auf relative Pfade ändern,
|
||||
sonst entstehen Doppelpfade.
|
||||
"""
|
||||
|
||||
from fastapi import APIRouter, HTTPException, Query
|
||||
from pydantic import BaseModel, Field
|
||||
from typing import List, Optional, Dict, Any
|
||||
from uuid import uuid4
|
||||
from datetime import datetime, date
|
||||
from datetime import datetime
|
||||
from clients import model, qdrant
|
||||
from qdrant_client.models import PointStruct, VectorParams, Distance, PointIdsList
|
||||
from qdrant_client.models import (
|
||||
PointStruct,
|
||||
VectorParams,
|
||||
Distance,
|
||||
PointIdsList,
|
||||
Filter,
|
||||
FieldCondition,
|
||||
MatchValue,
|
||||
)
|
||||
import logging
|
||||
import os
|
||||
|
||||
router = APIRouter()
|
||||
logger = logging.getLogger("exercise_router")
|
||||
logger.setLevel(logging.INFO)
|
||||
|
||||
# ---- Models ----
|
||||
# Router ohne prefix (Pfadstrings enthalten bereits '/exercise/...')
|
||||
router = APIRouter(tags=["exercise"])
|
||||
|
||||
# =========================
|
||||
# Models
|
||||
# =========================
|
||||
class Exercise(BaseModel):
|
||||
id: str = Field(default_factory=lambda: str(uuid4()))
|
||||
title: str
|
||||
summary: str
|
||||
short_description: str
|
||||
keywords: List[str] = []
|
||||
link: Optional[str] = None
|
||||
id: str = Field(default_factory=lambda: str(uuid4()), description="Interne UUID (Qdrant-Punkt-ID)")
|
||||
# Upsert-Metadaten
|
||||
external_id: Optional[str] = Field(default=None, description="Upsert-Schlüssel (z. B. 'mw:{pageid}')")
|
||||
fingerprint: Optional[str] = Field(default=None, description="sha256 der Kernfelder für Idempotenz/Diff")
|
||||
source: Optional[str] = Field(default=None, description="Quelle (z. B. 'mediawiki', 'pdf-import', …)")
|
||||
imported_at: Optional[datetime] = Field(default=None, description="Zeitpunkt des Imports (ISO-8601)")
|
||||
|
||||
discipline: str
|
||||
group: Optional[str] = None
|
||||
age_group: str
|
||||
target_group: str
|
||||
min_participants: int
|
||||
duration_minutes: int
|
||||
# Domain-Felder
|
||||
title: str = Field(..., description="Übungstitel")
|
||||
summary: str = Field(..., description="Kurzbeschreibung/Ziel der Übung")
|
||||
short_description: str = Field(..., description="Alternative Kurzform / Teaser")
|
||||
keywords: List[str] = Field(default_factory=list, description="Freie Schlagworte (normalisiert)")
|
||||
link: Optional[str] = Field(default=None, description="Kanonsiche URL/Permalink zur Quelle")
|
||||
discipline: str = Field(..., description="Disziplin (z. B. Karate)")
|
||||
group: Optional[str] = Field(default=None, description="Optionale Gruppierung/Kategorie")
|
||||
age_group: str = Field(..., description="Altersgruppe (z. B. Kinder/Schüler/Teenager/Erwachsene)")
|
||||
target_group: str = Field(..., description="Zielgruppe (z. B. Breitensportler)")
|
||||
min_participants: int = Field(..., ge=0, description="Minimale Gruppenstärke")
|
||||
duration_minutes: int = Field(..., ge=0, description="Dauer in Minuten")
|
||||
capabilities: Dict[str, int] = Field(default_factory=dict, description="Fähigkeiten-Map: {Name: Level 1..5}")
|
||||
category: str = Field(..., description="Abschnitt / Kategorie (z. B. Aufwärmen, Grundschule, …)")
|
||||
purpose: str = Field(..., description="Zweck/Zielabsicht")
|
||||
execution: str = Field(..., description="Durchführungsschritte (Markdown/Wiki-ähnlich)")
|
||||
notes: str = Field(..., description="Hinweise/Coaching-Cues")
|
||||
preparation: str = Field(..., description="Vorbereitung/Material")
|
||||
method: str = Field(..., description="Methodik/Didaktik")
|
||||
equipment: List[str] = Field(default_factory=list, description="Benötigte Hilfsmittel")
|
||||
|
||||
capabilities: Dict[str, int] = {}
|
||||
category: str
|
||||
|
||||
purpose: str
|
||||
execution: str
|
||||
notes: str
|
||||
preparation: str
|
||||
method: str
|
||||
equipment: List[str] = []
|
||||
|
||||
class PhaseExercise(BaseModel):
|
||||
exercise_id: str
|
||||
cond_load: Dict[str, Any] = {}
|
||||
coord_load: Dict[str, Any] = {}
|
||||
instructions: str
|
||||
|
||||
class PlanPhase(BaseModel):
|
||||
name: str
|
||||
duration_minutes: int
|
||||
method: str
|
||||
method_notes: str
|
||||
exercises: List[PhaseExercise]
|
||||
|
||||
class TrainingPlan(BaseModel):
|
||||
id: str = Field(default_factory=lambda: str(uuid4()))
|
||||
title: str
|
||||
short_description: str
|
||||
collection: str
|
||||
discipline: str
|
||||
group: Optional[str] = None
|
||||
dojo: str
|
||||
date: date
|
||||
plan_duration_weeks: int
|
||||
focus_areas: List[str] = []
|
||||
predecessor_plan_id: Optional[str] = None
|
||||
age_group: str
|
||||
created_at: datetime = Field(default_factory=datetime.utcnow)
|
||||
phases: List[PlanPhase]
|
||||
model_config = {
|
||||
"json_schema_extra": {
|
||||
"example": {
|
||||
"external_id": "mw:218",
|
||||
"title": "Affenklatschen",
|
||||
"summary": "Koordination & Aufmerksamkeit mit Ballwechseln",
|
||||
"short_description": "Ballgewöhnung im Stand/Gehen/Laufen",
|
||||
"keywords": ["Hand-Auge-Koordination", "Reaktion"],
|
||||
"link": "https://www.karatetrainer.de/index.php?title=Affenklatschen",
|
||||
"discipline": "Karate",
|
||||
"age_group": "Teenager",
|
||||
"target_group": "Breitensportler",
|
||||
"min_participants": 4,
|
||||
"duration_minutes": 8,
|
||||
"capabilities": {"Reaktionsfähigkeit": 2, "Kopplungsfähigkeit": 2},
|
||||
"category": "Aufwärmen",
|
||||
"purpose": "Aufmerksamkeit & Reaktionskette aktivieren",
|
||||
"execution": "* Paarweise aufstellen …",
|
||||
"notes": "* nicht zu lange werden lassen",
|
||||
"preparation": "* Bälle bereit halten",
|
||||
"method": "* klare Regeln/Strafrunde",
|
||||
"equipment": ["Bälle"]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
class DeleteResponse(BaseModel):
|
||||
status: str
|
||||
count: int
|
||||
collection: str
|
||||
source: Optional[str] = None
|
||||
type: Optional[str] = None
|
||||
status: str = Field(..., description="Statusmeldung")
|
||||
count: int = Field(..., ge=0, description="Anzahl betroffener Punkte")
|
||||
collection: str = Field(..., description="Qdrant-Collection-Name")
|
||||
|
||||
# ---- CRUD Endpoints for Exercise ----
|
||||
@router.post("/exercise", response_model=Exercise)
|
||||
def create_exercise(ex: Exercise):
|
||||
# Ensure Exercise collection exists
|
||||
if not qdrant.collection_exists("exercises"):
|
||||
class ExerciseSearchRequest(BaseModel):
|
||||
# Optionaler Semantik-Query (Vektor)
|
||||
query: Optional[str] = Field(default=None, description="Freitext für Vektor-Suche (optional)")
|
||||
limit: int = Field(default=20, ge=1, le=200, description="Max. Treffer")
|
||||
offset: int = Field(default=0, ge=0, description="Offset/Pagination")
|
||||
|
||||
# Einfache Filter
|
||||
discipline: Optional[str] = Field(default=None, description="z. B. Karate")
|
||||
target_group: Optional[str] = Field(default=None, description="z. B. Breitensportler")
|
||||
age_group: Optional[str] = Field(default=None, description="z. B. Teenager")
|
||||
max_duration: Optional[int] = Field(default=None, ge=0, description="Obergrenze Minuten")
|
||||
|
||||
# Listen-Filter
|
||||
equipment_any: Optional[List[str]] = Field(default=None, description="Mind. eines muss passen")
|
||||
equipment_all: Optional[List[str]] = Field(default=None, description="Alle müssen passen")
|
||||
keywords_any: Optional[List[str]] = Field(default=None, description="Mind. eines muss passen")
|
||||
keywords_all: Optional[List[str]] = Field(default=None, description="Alle müssen passen")
|
||||
|
||||
# Capabilities (Namen + Level-Operator)
|
||||
capability_names: Optional[List[str]] = Field(default=None, description="Capability-Bezeichnungen")
|
||||
capability_ge_level: Optional[int] = Field(default=None, ge=1, le=5, description="Level ≥ N")
|
||||
capability_eq_level: Optional[int] = Field(default=None, ge=1, le=5, description="Level == N")
|
||||
|
||||
model_config = {
|
||||
"json_schema_extra": {
|
||||
"examples": [{
|
||||
"discipline": "Karate",
|
||||
"max_duration": 12,
|
||||
"equipment_any": ["Bälle"],
|
||||
"capability_names": ["Reaktionsfähigkeit"],
|
||||
"capability_ge_level": 2,
|
||||
"limit": 5
|
||||
}, {
|
||||
"query": "Aufwärmen Reaktionsfähigkeit 10min Teenager Bälle",
|
||||
"discipline": "Karate",
|
||||
"limit": 3
|
||||
}]
|
||||
}
|
||||
}
|
||||
|
||||
class ExerciseSearchHit(BaseModel):
|
||||
id: str = Field(..., description="Qdrant-Punkt-ID")
|
||||
score: Optional[float] = Field(default=None, description="Ähnlichkeitsscore (nur bei Vektor-Suche)")
|
||||
payload: Exercise = Field(..., description="Übungsdaten (Payload)")
|
||||
|
||||
class ExerciseSearchResponse(BaseModel):
|
||||
hits: List[ExerciseSearchHit] = Field(..., description="Trefferliste")
|
||||
|
||||
model_config = {
|
||||
"json_schema_extra": {
|
||||
"example": {
|
||||
"hits": [{
|
||||
"id": "c1f1-…",
|
||||
"score": 0.78,
|
||||
"payload": Exercise.model_config["json_schema_extra"]["example"]
|
||||
}]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# =========================
|
||||
# Helpers
|
||||
# =========================
|
||||
COLLECTION = os.getenv("EXERCISE_COLLECTION", "exercises")
|
||||
|
||||
|
||||
def _ensure_collection():
|
||||
if not qdrant.collection_exists(COLLECTION):
|
||||
qdrant.recreate_collection(
|
||||
collection_name="exercises",
|
||||
collection_name=COLLECTION,
|
||||
vectors_config=VectorParams(
|
||||
size=model.get_sentence_embedding_dimension(),
|
||||
distance=Distance.COSINE
|
||||
)
|
||||
distance=Distance.COSINE,
|
||||
),
|
||||
)
|
||||
vec = model.encode(f"{ex.title}. {ex.summary}").tolist()
|
||||
point = PointStruct(id=ex.id, vector=vec, payload=ex.dict())
|
||||
qdrant.upsert(collection_name="exercises", points=[point])
|
||||
return ex
|
||||
|
||||
@router.get("/exercise", response_model=List[Exercise])
|
||||
def list_exercises(
|
||||
discipline: Optional[str] = Query(None),
|
||||
group: Optional[str] = Query(None),
|
||||
tags: Optional[str] = Query(None)
|
||||
):
|
||||
filters = []
|
||||
if discipline:
|
||||
filters.append({"key": "discipline", "match": {"value": discipline}})
|
||||
if group:
|
||||
filters.append({"key": "group", "match": {"value": group}})
|
||||
if tags:
|
||||
for t in tags.split(","):
|
||||
filters.append({"key": "keywords", "match": {"value": t.strip()}})
|
||||
|
||||
def _lookup_by_external_id(external_id: str) -> Optional[Dict[str, Any]]:
|
||||
_ensure_collection()
|
||||
flt = Filter(must=[FieldCondition(key="external_id", match=MatchValue(value=external_id))])
|
||||
pts, _ = qdrant.scroll(
|
||||
collection_name="exercises",
|
||||
scroll_filter={"must": filters} if filters else None,
|
||||
limit=10000
|
||||
collection_name=COLLECTION,
|
||||
scroll_filter=flt,
|
||||
limit=1,
|
||||
with_payload=True,
|
||||
)
|
||||
return [Exercise(**pt.payload) for pt in pts]
|
||||
if not pts:
|
||||
return None
|
||||
doc = dict(pts[0].payload or {})
|
||||
doc.setdefault("id", str(pts[0].id))
|
||||
return doc
|
||||
|
||||
# ---- CRUD Endpoints for TrainingPlan ----
|
||||
@router.post("/plan", response_model=TrainingPlan)
|
||||
def create_plan(plan: TrainingPlan):
|
||||
# Ensure TrainingPlan collection exists
|
||||
if not qdrant.collection_exists("training_plans"):
|
||||
qdrant.recreate_collection(
|
||||
collection_name="training_plans",
|
||||
vectors_config=VectorParams(
|
||||
size=model.get_sentence_embedding_dimension(),
|
||||
distance=Distance.COSINE
|
||||
)
|
||||
|
||||
_DEF_EMBED_FIELDS = ("title", "summary", "short_description", "purpose", "execution", "notes")
|
||||
|
||||
|
||||
def _make_vector_from_exercise(ex: Exercise) -> List[float]:
|
||||
text = ". ".join([getattr(ex, f, "") for f in _DEF_EMBED_FIELDS if getattr(ex, f, None)])
|
||||
return model.encode(text).tolist()
|
||||
|
||||
|
||||
def _make_vector_from_query(query: str) -> List[float]:
|
||||
return model.encode(query).tolist()
|
||||
|
||||
|
||||
def _norm_list(xs: List[Any]) -> List[str]:
|
||||
out = []
|
||||
seen = set()
|
||||
for x in xs or []:
|
||||
s = str(x).strip()
|
||||
if not s:
|
||||
continue
|
||||
key = s.casefold()
|
||||
if key in seen:
|
||||
continue
|
||||
seen.add(key)
|
||||
out.append(s)
|
||||
return sorted(out, key=str.casefold)
|
||||
|
||||
|
||||
def _facet_capabilities(caps: Dict[str, Any]) -> Dict[str, List[str]]:
|
||||
"""
|
||||
Leitet Facettenfelder aus der capabilities-Map ab:
|
||||
- capability_keys: alle Namen
|
||||
- capability_geN: Namen mit Level >= N (1..5)
|
||||
- capability_eqN: Namen mit Level == N (1..5)
|
||||
"""
|
||||
caps = caps or {}
|
||||
|
||||
def names_where(pred) -> List[str]:
|
||||
out = []
|
||||
for k, v in caps.items():
|
||||
try:
|
||||
iv = int(v)
|
||||
except Exception:
|
||||
iv = 0
|
||||
if pred(iv):
|
||||
t = str(k).strip()
|
||||
if t:
|
||||
out.append(t)
|
||||
return sorted({t for t in out}, key=str.casefold)
|
||||
|
||||
all_keys = sorted({str(k).strip() for k in caps.keys() if str(k).strip()}, key=str.casefold)
|
||||
return {
|
||||
"capability_keys": all_keys,
|
||||
# >= N
|
||||
"capability_ge1": names_where(lambda lv: lv >= 1),
|
||||
"capability_ge2": names_where(lambda lv: lv >= 2),
|
||||
"capability_ge3": names_where(lambda lv: lv >= 3),
|
||||
"capability_ge4": names_where(lambda lv: lv >= 4),
|
||||
"capability_ge5": names_where(lambda lv: lv >= 5),
|
||||
# == N
|
||||
"capability_eq1": names_where(lambda lv: lv == 1),
|
||||
"capability_eq2": names_where(lambda lv: lv == 2),
|
||||
"capability_eq3": names_where(lambda lv: lv == 3),
|
||||
"capability_eq4": names_where(lambda lv: lv == 4),
|
||||
"capability_eq5": names_where(lambda lv: lv == 5),
|
||||
}
|
||||
|
||||
|
||||
def _response_strip_extras(payload: Dict[str, Any]) -> Dict[str, Any]:
|
||||
# Nur definierte Exercise-Felder zurückgeben (saubere API)
|
||||
allowed = set(Exercise.model_fields.keys())
|
||||
return {k: v for k, v in payload.items() if k in allowed}
|
||||
|
||||
|
||||
def _build_filter(req: ExerciseSearchRequest) -> Filter:
|
||||
must: List[Any] = []
|
||||
should: List[Any] = []
|
||||
|
||||
if req.discipline:
|
||||
must.append(FieldCondition(key="discipline", match=MatchValue(value=req.discipline)))
|
||||
if req.target_group:
|
||||
must.append(FieldCondition(key="target_group", match=MatchValue(value=req.target_group)))
|
||||
if req.age_group:
|
||||
must.append(FieldCondition(key="age_group", match=MatchValue(value=req.age_group)))
|
||||
if req.max_duration is not None:
|
||||
# Range in Qdrant: über rohen JSON-Range-Ausdruck (Client-Modell hat keinen Komfort-Wrapper)
|
||||
must.append({"key": "duration_minutes", "range": {"lte": int(req.max_duration)}})
|
||||
|
||||
# equipment
|
||||
if req.equipment_all:
|
||||
for it in req.equipment_all:
|
||||
must.append(FieldCondition(key="equipment", match=MatchValue(value=it)))
|
||||
if req.equipment_any:
|
||||
for it in req.equipment_any:
|
||||
should.append(FieldCondition(key="equipment", match=MatchValue(value=it)))
|
||||
|
||||
# keywords
|
||||
if req.keywords_all:
|
||||
for it in req.keywords_all:
|
||||
must.append(FieldCondition(key="keywords", match=MatchValue(value=it)))
|
||||
if req.keywords_any:
|
||||
for it in req.keywords_any:
|
||||
should.append(FieldCondition(key="keywords", match=MatchValue(value=it)))
|
||||
|
||||
# capabilities (ge/eq)
|
||||
if req.capability_names:
|
||||
names = [s for s in req.capability_names if s and s.strip()]
|
||||
if req.capability_eq_level:
|
||||
key = f"capability_eq{int(req.capability_eq_level)}"
|
||||
for n in names:
|
||||
must.append(FieldCondition(key=key, match=MatchValue(value=n)))
|
||||
elif req.capability_ge_level:
|
||||
key = f"capability_ge{int(req.capability_ge_level)}"
|
||||
for n in names:
|
||||
must.append(FieldCondition(key=key, match=MatchValue(value=n)))
|
||||
else:
|
||||
# Default: Level >=1 (alle vorhanden)
|
||||
for n in names:
|
||||
must.append(FieldCondition(key="capability_ge1", match=MatchValue(value=n)))
|
||||
|
||||
flt = Filter(must=must)
|
||||
if should:
|
||||
# Qdrant: 'should' entspricht OR mit minimum_should_match=1
|
||||
flt.should = should
|
||||
return flt
|
||||
|
||||
# =========================
|
||||
# Endpoints
|
||||
# =========================
|
||||
@router.get(
|
||||
"/exercise/by-external-id",
|
||||
summary="Übung per external_id abrufen",
|
||||
description=(
|
||||
"Liefert die Übung mit der gegebenen `external_id` (z. B. `mw:{pageid}`). "
|
||||
"Verwendet einen Qdrant-Filter auf dem Payload-Feld `external_id`."
|
||||
),
|
||||
response_description="Vollständiger Exercise-Payload oder 404 bei Nichtfund.",
|
||||
openapi_extra={
|
||||
"x-codeSamples": [{
|
||||
"lang": "bash",
|
||||
"label": "curl",
|
||||
"source": "curl -s 'http://localhost:8000/exercise/by-external-id?external_id=mw:218' | jq ."
|
||||
}]
|
||||
}
|
||||
)
|
||||
def get_exercise_by_external_id(external_id: str = Query(..., min_length=3, description="Upsert-Schlüssel, z. B. 'mw:218'")):
|
||||
found = _lookup_by_external_id(external_id)
|
||||
if not found:
|
||||
raise HTTPException(status_code=404, detail="not found")
|
||||
return found
|
||||
|
||||
|
||||
@router.post(
|
||||
"/exercise",
|
||||
response_model=Exercise,
|
||||
summary="Create/Update (idempotent per external_id)",
|
||||
description=(
|
||||
"Legt eine Übung an oder aktualisiert sie. Wenn `external_id` vorhanden und bereits in der Collection existiert, "
|
||||
"wird **Update** auf dem bestehenden Punkt ausgeführt (Upsert). `keywords`/`equipment` werden normalisiert, "
|
||||
"Capability-Facetten (`capability_ge1..5`, `capability_eq1..5`, `capability_keys`) automatisch abgeleitet. "
|
||||
"Der Vektor wird aus Kernfeldern (title/summary/short_description/purpose/execution/notes) berechnet."
|
||||
),
|
||||
response_description="Gespeicherter Exercise-Datensatz (Payload-View).",
|
||||
openapi_extra={
|
||||
"x-codeSamples": [{
|
||||
"lang": "bash",
|
||||
"label": "curl",
|
||||
"source": "curl -s -X POST http://localhost:8000/exercise -H 'Content-Type: application/json' -d @exercise.json | jq ."
|
||||
}]
|
||||
}
|
||||
)
|
||||
def create_or_update_exercise(ex: Exercise):
|
||||
_ensure_collection()
|
||||
|
||||
point_id = ex.id
|
||||
if ex.external_id:
|
||||
prior = _lookup_by_external_id(ex.external_id)
|
||||
if prior:
|
||||
point_id = prior.get("id", point_id)
|
||||
|
||||
vector = _make_vector_from_exercise(ex)
|
||||
|
||||
payload: Dict[str, Any] = ex.model_dump()
|
||||
payload["id"] = str(point_id)
|
||||
payload["keywords"] = _norm_list(payload.get("keywords") or [])
|
||||
payload["equipment"] = _norm_list(payload.get("equipment") or [])
|
||||
|
||||
payload.update(_facet_capabilities(payload.get("capabilities") or {}))
|
||||
|
||||
qdrant.upsert(
|
||||
collection_name=COLLECTION,
|
||||
points=[PointStruct(id=str(point_id), vector=vector, payload=payload)],
|
||||
)
|
||||
|
||||
return Exercise(**_response_strip_extras(payload))
|
||||
|
||||
|
||||
@router.get(
|
||||
"/exercise/{exercise_id}",
|
||||
response_model=Exercise,
|
||||
summary="Übung per interner ID (Qdrant-Punkt-ID) lesen",
|
||||
description="Scrollt nach `id` und gibt den Payload als Exercise zurück.",
|
||||
response_description="Exercise-Payload oder 404 bei Nichtfund.",
|
||||
openapi_extra={
|
||||
"x-codeSamples": [{
|
||||
"lang": "bash",
|
||||
"label": "curl",
|
||||
"source": "curl -s 'http://localhost:8000/exercise/1234-uuid' | jq ."
|
||||
}]
|
||||
}
|
||||
)
|
||||
def get_exercise(exercise_id: str):
|
||||
_ensure_collection()
|
||||
pts, _ = qdrant.scroll(
|
||||
collection_name=COLLECTION,
|
||||
scroll_filter=Filter(must=[FieldCondition(key="id", match=MatchValue(value=exercise_id))]),
|
||||
limit=1,
|
||||
with_payload=True,
|
||||
)
|
||||
if not pts:
|
||||
raise HTTPException(status_code=404, detail="not found")
|
||||
payload = dict(pts[0].payload or {})
|
||||
payload.setdefault("id", str(pts[0].id))
|
||||
return Exercise(**_response_strip_extras(payload))
|
||||
|
||||
|
||||
@router.post(
|
||||
"/exercise/search",
|
||||
response_model=ExerciseSearchResponse,
|
||||
summary="Suche Übungen (Filter + optional Vektor)",
|
||||
description=(
|
||||
"Kombinierbare Filter auf Payload-Feldern (`discipline`, `age_group`, `target_group`, `equipment`, `keywords`, "
|
||||
"`capability_geN/eqN`) und **optional** Vektor-Suche via `query`. "
|
||||
"`should`-Filter (equipment_any/keywords_any) wirken als OR (minimum_should_match=1). "
|
||||
"`max_duration` wird als Range (lte) angewandt. Ergebnis enthält bei Vektor-Suche `score`, sonst `null`."
|
||||
),
|
||||
response_description="Trefferliste (payload + Score bei Vektor-Suche).",
|
||||
openapi_extra={
|
||||
"x-codeSamples": [
|
||||
{
|
||||
"lang": "bash",
|
||||
"label": "Filter",
|
||||
"source": "curl -s -X POST http://localhost:8000/exercise/search -H 'Content-Type: application/json' -d '{\"discipline\":\"Karate\",\"max_duration\":12,\"equipment_any\":[\"Bälle\"],\"capability_names\":[\"Reaktionsfähigkeit\"],\"capability_ge_level\":2,\"limit\":5}' | jq ."
|
||||
},
|
||||
{
|
||||
"lang": "bash",
|
||||
"label": "Vektor + Filter",
|
||||
"source": "curl -s -X POST http://localhost:8000/exercise/search -H 'Content-Type: application/json' -d '{\"query\":\"Aufwärmen 10min Teenager Bälle\",\"discipline\":\"Karate\",\"limit\":3}' | jq ."
|
||||
}
|
||||
]
|
||||
}
|
||||
)
|
||||
def search_exercises(req: ExerciseSearchRequest) -> ExerciseSearchResponse:
|
||||
_ensure_collection()
|
||||
flt = _build_filter(req)
|
||||
|
||||
hits: List[ExerciseSearchHit] = []
|
||||
if req.query:
|
||||
vec = _make_vector_from_query(req.query)
|
||||
res = qdrant.search(
|
||||
collection_name=COLLECTION,
|
||||
query_vector=vec,
|
||||
limit=req.limit,
|
||||
offset=req.offset,
|
||||
query_filter=flt,
|
||||
)
|
||||
vec = model.encode(f"{plan.title}. {plan.short_description}").tolist()
|
||||
point = PointStruct(id=plan.id, vector=vec, payload=plan.dict())
|
||||
qdrant.upsert(collection_name="training_plans", points=[point])
|
||||
return plan
|
||||
for h in res:
|
||||
payload = dict(h.payload or {})
|
||||
payload.setdefault("id", str(h.id))
|
||||
hits.append(ExerciseSearchHit(id=str(h.id), score=float(h.score or 0.0), payload=Exercise(**_response_strip_extras(payload))))
|
||||
else:
|
||||
# Filter-only: Scroll-Paginierung, Score=None
|
||||
collected = 0
|
||||
skipped = 0
|
||||
next_offset = None
|
||||
while collected < req.limit:
|
||||
page, next_offset = qdrant.scroll(
|
||||
collection_name=COLLECTION,
|
||||
scroll_filter=flt,
|
||||
offset=next_offset,
|
||||
limit=max(1, min(256, req.limit - collected + req.offset - skipped)),
|
||||
with_payload=True,
|
||||
)
|
||||
if not page:
|
||||
break
|
||||
for pt in page:
|
||||
if skipped < req.offset:
|
||||
skipped += 1
|
||||
continue
|
||||
payload = dict(pt.payload or {})
|
||||
payload.setdefault("id", str(pt.id))
|
||||
hits.append(ExerciseSearchHit(id=str(pt.id), score=None, payload=Exercise(**_response_strip_extras(payload))))
|
||||
collected += 1
|
||||
if collected >= req.limit:
|
||||
break
|
||||
if next_offset is None:
|
||||
break
|
||||
return ExerciseSearchResponse(hits=hits)
|
||||
|
||||
@router.get("/plan", response_model=List[TrainingPlan])
|
||||
def list_plans(
|
||||
collection: str = Query("training_plans"),
|
||||
discipline: Optional[str] = Query(None),
|
||||
group: Optional[str] = Query(None),
|
||||
dojo: Optional[str] = Query(None)
|
||||
):
|
||||
if not qdrant.collection_exists(collection):
|
||||
return []
|
||||
pts, _ = qdrant.scroll(collection_name=collection, limit=10000)
|
||||
result = []
|
||||
for pt in pts:
|
||||
pl = TrainingPlan(**pt.payload)
|
||||
if discipline and pl.discipline != discipline:
|
||||
continue
|
||||
if group and pl.group != group:
|
||||
continue
|
||||
if dojo and pl.dojo != dojo:
|
||||
continue
|
||||
result.append(pl)
|
||||
return result
|
||||
|
||||
# ---- Delete Endpoints ----
|
||||
@router.delete("/delete-source", response_model=DeleteResponse)
|
||||
def delete_by_source(
|
||||
collection: str = Query(...),
|
||||
source: Optional[str] = Query(None),
|
||||
type: Optional[str] = Query(None)
|
||||
):
|
||||
if not qdrant.collection_exists(collection):
|
||||
raise HTTPException(status_code=404, detail=f"Collection '{collection}' nicht gefunden.")
|
||||
filt = []
|
||||
if source:
|
||||
filt.append({"key": "source", "match": {"value": source}})
|
||||
if type:
|
||||
filt.append({"key": "type", "match": {"value": type}})
|
||||
if not filt:
|
||||
raise HTTPException(status_code=400, detail="Mindestens ein Filterparameter muss angegeben werden.")
|
||||
pts, _ = qdrant.scroll(collection_name=collection, scroll_filter={"must": filt}, limit=10000)
|
||||
@router.delete(
|
||||
"/exercise/delete-by-external-id",
|
||||
response_model=DeleteResponse,
|
||||
summary="Löscht Punkte mit gegebener external_id",
|
||||
description=(
|
||||
"Scrollt nach `external_id` und löscht alle passenden Punkte. "
|
||||
"Idempotent: wenn nichts gefunden → count=0. Vorsicht: **löscht dauerhaft**."
|
||||
),
|
||||
response_description="Status + Anzahl gelöschter Punkte.",
|
||||
openapi_extra={
|
||||
"x-codeSamples": [{
|
||||
"lang": "bash",
|
||||
"label": "curl",
|
||||
"source": "curl -s 'http://localhost:8000/exercise/delete-by-external-id?external_id=mw:9999' | jq ."
|
||||
}]
|
||||
}
|
||||
)
|
||||
def delete_by_external_id(external_id: str = Query(..., description="Upsert-Schlüssel, z. B. 'mw:218'")):
|
||||
_ensure_collection()
|
||||
flt = Filter(must=[FieldCondition(key="external_id", match=MatchValue(value=external_id))])
|
||||
pts, _ = qdrant.scroll(collection_name=COLLECTION, scroll_filter=flt, limit=10000, with_payload=False)
|
||||
ids = [str(p.id) for p in pts]
|
||||
if not ids:
|
||||
return DeleteResponse(status="🔍 Keine Einträge gefunden.", count=0, collection=collection)
|
||||
qdrant.delete(collection_name=collection, points_selector=PointIdsList(points=ids))
|
||||
return DeleteResponse(status="🗑️ gelöscht", count=len(ids), collection=collection)
|
||||
return DeleteResponse(status="🔍 Keine Einträge gefunden.", count=0, collection=COLLECTION)
|
||||
qdrant.delete(collection_name=COLLECTION, points_selector=PointIdsList(points=ids))
|
||||
return DeleteResponse(status="🗑️ gelöscht", count=len(ids), collection=COLLECTION)
|
||||
|
||||
@router.delete("/delete-collection", response_model=DeleteResponse)
|
||||
def delete_collection(
|
||||
collection: str = Query(...)
|
||||
):
|
||||
|
||||
@router.delete(
|
||||
"/exercise/delete-collection",
|
||||
response_model=DeleteResponse,
|
||||
summary="Collection komplett löschen",
|
||||
description=(
|
||||
"Entfernt die gesamte Collection aus Qdrant. **Gefährlich** – alle Übungen sind danach weg. "
|
||||
"Nutze nur in Testumgebungen oder für einen kompletten Neuaufbau."
|
||||
),
|
||||
response_description="Status. count=0 (nicht relevant beim Drop).",
|
||||
openapi_extra={
|
||||
"x-codeSamples": [{
|
||||
"lang": "bash",
|
||||
"label": "curl",
|
||||
"source": "curl -s 'http://localhost:8000/exercise/delete-collection?collection=exercises' | jq ."
|
||||
}]
|
||||
}
|
||||
)
|
||||
def delete_collection(collection: str = Query(default=COLLECTION, description="Collection-Name (Default: 'exercises')")):
|
||||
if not qdrant.collection_exists(collection):
|
||||
raise HTTPException(status_code=404, detail=f"Collection '{collection}' nicht gefunden.")
|
||||
qdrant.delete_collection(collection_name=collection)
|
||||
return DeleteResponse(status="🗑️ gelöscht", count=0, collection=collection)
|
||||
|
||||
|
||||
# ---------------------------
|
||||
# OPTIONAL: einfacher Selbsttest (kannst du auch separat als Script verwenden)
|
||||
# ---------------------------
|
||||
TEST_DOC = """
|
||||
Speicher als tests/test_exercise_search.py und mit pytest laufen lassen.
|
||||
|
||||
import os, requests
|
||||
BASE = os.getenv("API_BASE", "http://localhost:8000")
|
||||
|
||||
# 1) Filter-only
|
||||
r = requests.post(f"{BASE}/exercise/search", json={
|
||||
"discipline": "Karate",
|
||||
"max_duration": 12,
|
||||
"equipment_any": ["Bälle"],
|
||||
"capability_names": ["Reaktionsfähigkeit"],
|
||||
"capability_ge_level": 2,
|
||||
"limit": 5
|
||||
})
|
||||
r.raise_for_status()
|
||||
js = r.json()
|
||||
assert "hits" in js
|
||||
for h in js["hits"]:
|
||||
p = h["payload"]
|
||||
assert p["discipline"] == "Karate"
|
||||
assert p["duration_minutes"] <= 12
|
||||
|
||||
# 2) Vector + Filter
|
||||
r = requests.post(f"{BASE}/exercise/search", json={
|
||||
"query": "Aufwärmen 10min, Reaktionsfähigkeit, Teenager, Bälle",
|
||||
"discipline": "Karate",
|
||||
"limit": 3
|
||||
})
|
||||
r.raise_for_status()
|
||||
js = r.json(); assert len(js["hits"]) <= 3
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -1,33 +1,161 @@
|
|||
from dotenv import load_dotenv
|
||||
load_dotenv() # Lädt Variablen aus .env in os.environ
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
llm_api.py – v1.2.0 (zentraler .env-Bootstrap, saubere Router-Einbindung, Swagger-Doku)
|
||||
|
||||
Änderungen ggü. v1.1.6:
|
||||
- Zentrales .env-Bootstrapping VOR allen Router-Imports (findet Datei robust; setzt LLMAPI_ENV_FILE/LLMAPI_ENV_BOOTSTRAPPED)
|
||||
- Konsistente Swagger-Beschreibung + Tags-Metadaten
|
||||
- Router ohne doppelte Prefixe einbinden (die Prefixe werden in den Routern definiert)
|
||||
- Root-/health und /version Endpoints
|
||||
- Defensive Includes (Router-Importfehler verhindern Server-Absturz; Logging statt Crash)
|
||||
- Beibehaltener globaler Fehlerhandler (generische 500)
|
||||
|
||||
Hinweis:
|
||||
- wiki_router im Canvas (v1.4.2) nutzt bereits robustes .env-Loading, respektiert aber die zentral gesetzten ENV-Variablen.
|
||||
- Wenn du ENV-Datei an anderem Ort hast, setze in der Systemd-Unit `Environment=LLMAPI_ENV_FILE=/pfad/.env`.
|
||||
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
from textwrap import dedent
|
||||
from typing import Optional
|
||||
|
||||
from fastapi import FastAPI
|
||||
from fastapi.responses import JSONResponse
|
||||
from clients import model, qdrant
|
||||
from wiki_router import router as wiki_router
|
||||
from embed_router import router as embed_router
|
||||
from exercise_router import router as exercise_router
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
|
||||
# Version
|
||||
__version__ = "1.1.6"
|
||||
# ----------------------
|
||||
# Zentraler .env-Bootstrap (VOR Router-Imports ausführen!)
|
||||
# ----------------------
|
||||
def _bootstrap_env() -> Optional[str]:
|
||||
try:
|
||||
from dotenv import load_dotenv, find_dotenv
|
||||
except Exception:
|
||||
print("[env] python-dotenv nicht installiert – überspringe .env-Loading", flush=True)
|
||||
return None
|
||||
|
||||
candidates: list[str] = []
|
||||
if os.getenv("LLMAPI_ENV_FILE"):
|
||||
candidates.append(os.getenv("LLMAPI_ENV_FILE") or "")
|
||||
fd = find_dotenv(".env", usecwd=True)
|
||||
if fd:
|
||||
candidates.append(fd)
|
||||
candidates += [
|
||||
str(Path.cwd() / ".env"),
|
||||
str(Path(__file__).parent / ".env"),
|
||||
str(Path.home() / ".env"),
|
||||
str(Path.home() / ".llm-api.env"),
|
||||
"/etc/llm-api.env",
|
||||
]
|
||||
|
||||
for p in candidates:
|
||||
try:
|
||||
if p and Path(p).exists():
|
||||
if load_dotenv(p, override=False):
|
||||
os.environ["LLMAPI_ENV_FILE"] = p
|
||||
os.environ["LLMAPI_ENV_BOOTSTRAPPED"] = "1"
|
||||
print(f"[env] loaded: {p}", flush=True)
|
||||
return p
|
||||
except Exception as e:
|
||||
print(f"[env] load failed for {p}: {e}", flush=True)
|
||||
print("[env] no .env found; using process env", flush=True)
|
||||
return None
|
||||
|
||||
_ENV_SRC = _bootstrap_env()
|
||||
|
||||
# ----------------------
|
||||
# App + OpenAPI-Metadaten
|
||||
# ----------------------
|
||||
__version__ = "1.2.0"
|
||||
print(f"[DEBUG] llm_api.py version {__version__} loaded from {__file__}", flush=True)
|
||||
|
||||
TAGS = [
|
||||
{
|
||||
"name": "wiki",
|
||||
"description": dedent(
|
||||
"""
|
||||
MediaWiki-Proxy (Health, Login, Page-Info/Parse, SMW-Ask).
|
||||
**ENV**: `WIKI_API_URL`, `WIKI_TIMEOUT`, `WIKI_RETRIES`, `WIKI_SLEEP_MS`, `WIKI_BATCH`.
|
||||
"""
|
||||
),
|
||||
},
|
||||
{
|
||||
"name": "exercise",
|
||||
"description": dedent(
|
||||
"""
|
||||
Übungen (Upsert, Suche, Delete). Upsert-Schlüssel: `external_id` (z. B. `mw:{pageid}`).
|
||||
**ENV**: `EXERCISE_COLLECTION`, `QDRANT_HOST`, `QDRANT_PORT`.
|
||||
"""
|
||||
),
|
||||
},
|
||||
{
|
||||
"name": "plans",
|
||||
"description": "Trainingspläne (Templates/Generate/Export).",
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
# FastAPI-Instanz
|
||||
app = FastAPI(
|
||||
title="KI Trainerassistent API",
|
||||
description="Modulare API für Trainingsplanung und MediaWiki-Import",
|
||||
description=dedent(
|
||||
f"""
|
||||
Modulare API für Trainingsplanung und MediaWiki-Import.
|
||||
|
||||
**Version:** {__version__}
|
||||
|
||||
## Quickstart (CLI)
|
||||
```bash
|
||||
python3 wiki_importer.py --all
|
||||
python3 wiki_importer.py --all --category "Übungen" --dry-run
|
||||
```
|
||||
"""
|
||||
),
|
||||
version=__version__,
|
||||
openapi_tags=TAGS,
|
||||
swagger_ui_parameters={"docExpansion": "list", "defaultModelsExpandDepth": 0},
|
||||
)
|
||||
|
||||
# Globaler Fehlerhandler
|
||||
# Optional: CORS für lokale UIs/Tools
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=["*"],
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
# ----------------------
|
||||
# Globaler Fehlerhandler (generisch)
|
||||
# ----------------------
|
||||
@app.exception_handler(Exception)
|
||||
async def unicorn_exception_handler(request, exc):
|
||||
return JSONResponse(status_code=500, content={"detail": "Interner Serverfehler."})
|
||||
|
||||
# Router einbinden
|
||||
app.include_router(wiki_router, prefix="/import/wiki")
|
||||
app.include_router(embed_router)
|
||||
app.include_router(exercise_router)
|
||||
# ----------------------
|
||||
# Router einbinden (WICHTIG: keine zusätzlichen Prefixe hier setzen)
|
||||
# ----------------------
|
||||
|
||||
def _include_router_safely(name: str, import_path: str):
|
||||
try:
|
||||
module = __import__(import_path, fromlist=["router"]) # lazy import nach ENV-Bootstrap
|
||||
app.include_router(module.router)
|
||||
print(f"[router] {name} included", flush=True)
|
||||
except Exception as e:
|
||||
print(f"[router] {name} NOT included: {e}", flush=True)
|
||||
|
||||
_include_router_safely("wiki_router", "wiki_router") # prefix in Datei: /import/wiki
|
||||
_include_router_safely("embed_router", "embed_router")
|
||||
_include_router_safely("exercise_router", "exercise_router")
|
||||
_include_router_safely("plan_router", "plan_router")
|
||||
_include_router_safely("plan_session_router", "plan_session_router")
|
||||
|
||||
# ----------------------
|
||||
# Basis-Endpunkte
|
||||
# ----------------------
|
||||
@app.get("/health", tags=["wiki"], summary="API-Health (lokal)")
|
||||
def api_health():
|
||||
return {"status": "ok"}
|
||||
|
||||
@app.get("/version", tags=["wiki"], summary="API-Version & ENV-Quelle")
|
||||
def api_version():
|
||||
return {"version": __version__, "env_file": _ENV_SRC}
|
||||
|
|
|
|||
521
llm-api/plan_router.py
Normal file
521
llm-api/plan_router.py
Normal file
|
|
@ -0,0 +1,521 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
plan_router.py – v0.13.4 (WP-15)
|
||||
|
||||
Änderungen ggü. v0.13.3
|
||||
- Idempotenter POST /plan: Wenn ein Plan mit gleichem Fingerprint existiert und die neue
|
||||
Anfrage ein späteres `created_at` trägt, wird der gespeicherte Plan mit dem neueren
|
||||
`created_at` und `created_at_ts` aktualisiert (kein Duplikat, aber zeitlich „frisch“).
|
||||
- /plans: Mehrseitiges Scrollen bleibt aktiv; Zeitfenster-Filter robust (serverseitig + Fallback).
|
||||
"""
|
||||
from fastapi import APIRouter, HTTPException, Query
|
||||
from pydantic import BaseModel, Field
|
||||
from typing import List, Optional, Dict, Any
|
||||
from uuid import uuid4
|
||||
from datetime import datetime, timezone
|
||||
import hashlib
|
||||
import json
|
||||
import os
|
||||
|
||||
from clients import model, qdrant
|
||||
from qdrant_client.models import (
|
||||
PointStruct, Filter, FieldCondition, MatchValue,
|
||||
VectorParams, Distance, Range
|
||||
)
|
||||
|
||||
router = APIRouter(tags=["plans"])
|
||||
|
||||
# -----------------
|
||||
# Konfiguration
|
||||
# -----------------
|
||||
PLAN_COLLECTION = os.getenv("PLAN_COLLECTION") or os.getenv("QDRANT_COLLECTION_PLANS", "plans")
|
||||
PLAN_TEMPLATE_COLLECTION = os.getenv("PLAN_TEMPLATE_COLLECTION", "plan_templates")
|
||||
PLAN_SESSION_COLLECTION = os.getenv("PLAN_SESSION_COLLECTION", "plan_sessions")
|
||||
EXERCISE_COLLECTION = os.getenv("EXERCISE_COLLECTION", "exercises")
|
||||
|
||||
# -----------------
|
||||
# Modelle
|
||||
# -----------------
|
||||
class TemplateSection(BaseModel):
|
||||
name: str
|
||||
target_minutes: int
|
||||
must_keywords: List[str] = []
|
||||
ideal_keywords: List[str] = []
|
||||
supplement_keywords: List[str] = []
|
||||
forbid_keywords: List[str] = []
|
||||
capability_targets: Dict[str, int] = {}
|
||||
|
||||
class PlanTemplate(BaseModel):
|
||||
id: str = Field(default_factory=lambda: str(uuid4()))
|
||||
name: str
|
||||
discipline: str
|
||||
age_group: str
|
||||
target_group: str
|
||||
total_minutes: int
|
||||
sections: List[TemplateSection] = []
|
||||
goals: List[str] = []
|
||||
equipment_allowed: List[str] = []
|
||||
created_by: str
|
||||
version: str
|
||||
|
||||
class PlanItem(BaseModel):
|
||||
exercise_external_id: str
|
||||
duration: int
|
||||
why: str
|
||||
|
||||
class PlanSection(BaseModel):
|
||||
name: str
|
||||
items: List[PlanItem] = []
|
||||
minutes: int
|
||||
|
||||
class Plan(BaseModel):
|
||||
id: str = Field(default_factory=lambda: str(uuid4()))
|
||||
template_id: Optional[str] = None
|
||||
title: str
|
||||
discipline: str
|
||||
age_group: str
|
||||
target_group: str
|
||||
total_minutes: int
|
||||
sections: List[PlanSection] = []
|
||||
goals: List[str] = []
|
||||
capability_summary: Dict[str, int] = {}
|
||||
novelty_against_last_n: Optional[float] = None
|
||||
fingerprint: Optional[str] = None
|
||||
created_by: str
|
||||
created_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
|
||||
source: str = "API"
|
||||
|
||||
class PlanTemplateList(BaseModel):
|
||||
items: List[PlanTemplate]
|
||||
limit: int
|
||||
offset: int
|
||||
count: int
|
||||
|
||||
class PlanList(BaseModel):
|
||||
items: List[Plan]
|
||||
limit: int
|
||||
offset: int
|
||||
count: int
|
||||
|
||||
# -----------------
|
||||
# Helpers
|
||||
# -----------------
|
||||
def _ensure_collection(name: str):
|
||||
if not qdrant.collection_exists(name):
|
||||
qdrant.recreate_collection(
|
||||
collection_name=name,
|
||||
vectors_config=VectorParams(size=model.get_sentence_embedding_dimension(), distance=Distance.COSINE),
|
||||
)
|
||||
|
||||
def _norm_list(xs: List[str]) -> List[str]:
|
||||
seen, out = set(), []
|
||||
for x in xs or []:
|
||||
s = str(x).strip()
|
||||
k = s.casefold()
|
||||
if s and k not in seen:
|
||||
seen.add(k)
|
||||
out.append(s)
|
||||
return sorted(out, key=str.casefold)
|
||||
|
||||
def _template_embed_text(tpl: PlanTemplate) -> str:
|
||||
parts = [tpl.name, tpl.discipline, tpl.age_group, tpl.target_group]
|
||||
parts += tpl.goals
|
||||
parts += [s.name for s in tpl.sections]
|
||||
return ". ".join([p for p in parts if p])
|
||||
|
||||
def _plan_embed_text(p: Plan) -> str:
|
||||
parts = [p.title, p.discipline, p.age_group, p.target_group]
|
||||
parts += p.goals
|
||||
parts += [s.name for s in p.sections]
|
||||
return ". ".join([p for p in parts if p])
|
||||
|
||||
def _embed(text: str):
|
||||
return model.encode(text or "").tolist()
|
||||
|
||||
def _fingerprint_for_plan(p: Plan) -> str:
|
||||
"""sha256(title, total_minutes, sections.items.exercise_external_id, sections.items.duration)"""
|
||||
core = {
|
||||
"title": p.title,
|
||||
"total_minutes": int(p.total_minutes),
|
||||
"items": [
|
||||
{"exercise_external_id": it.exercise_external_id, "duration": int(it.duration)}
|
||||
for sec in p.sections
|
||||
for it in (sec.items or [])
|
||||
],
|
||||
}
|
||||
raw = json.dumps(core, sort_keys=True, ensure_ascii=False)
|
||||
return hashlib.sha256(raw.encode("utf-8")).hexdigest()
|
||||
|
||||
def _get_by_field(collection: str, key: str, value: Any):
|
||||
flt = Filter(must=[FieldCondition(key=key, match=MatchValue(value=value))])
|
||||
pts, _ = qdrant.scroll(collection_name=collection, scroll_filter=flt, limit=1, with_payload=True)
|
||||
if not pts:
|
||||
return None
|
||||
point = pts[0]
|
||||
payload = dict(point.payload or {})
|
||||
payload.setdefault("id", str(point.id))
|
||||
return {"id": point.id, "payload": payload}
|
||||
|
||||
def _as_model(model_cls, payload: Dict[str, Any]):
|
||||
fields = getattr(model_cls, "model_fields", None) or getattr(model_cls, "__fields__", {})
|
||||
allowed = set(fields.keys())
|
||||
data = {k: payload[k] for k in payload.keys() if k in allowed}
|
||||
return model_cls(**data)
|
||||
|
||||
def _truthy(val: Optional[str]) -> bool:
|
||||
return str(val or "").strip().lower() in {"1", "true", "yes", "on"}
|
||||
|
||||
def _exists_in_collection(collection: str, key: str, value: Any) -> bool:
|
||||
flt = Filter(must=[FieldCondition(key=key, match=MatchValue(value=value))])
|
||||
pts, _ = qdrant.scroll(collection_name=collection, scroll_filter=flt, limit=1, with_payload=False)
|
||||
return bool(pts)
|
||||
|
||||
def _parse_iso_to_ts(iso_str: str) -> float:
|
||||
try:
|
||||
return float(datetime.fromisoformat(iso_str.replace("Z", "+00:00")).timestamp())
|
||||
except Exception:
|
||||
return float(datetime.now(timezone.utc).timestamp())
|
||||
|
||||
def _scroll_collect(collection: str, flt: Optional[Filter], need: int, page: int = 256):
|
||||
out = []
|
||||
offset = None
|
||||
page = max(1, min(page, 1024))
|
||||
while len(out) < need:
|
||||
pts, offset = qdrant.scroll(collection_name=collection, scroll_filter=flt, limit=min(page, need - len(out)), with_payload=True, offset=offset)
|
||||
if not pts:
|
||||
break
|
||||
out.extend(pts)
|
||||
if offset is None:
|
||||
break
|
||||
return out
|
||||
|
||||
# -----------------
|
||||
# Endpoints: Templates
|
||||
# -----------------
|
||||
@router.post(
|
||||
"/plan_templates",
|
||||
response_model=PlanTemplate,
|
||||
summary="Create a plan template",
|
||||
description=(
|
||||
"Erstellt ein Plan-Template (Strukturplanung).\n\n"
|
||||
"• Mehrere Sections erlaubt.\n"
|
||||
"• Section-Felder: must/ideal/supplement/forbid keywords + capability_targets.\n"
|
||||
"• Materialisierte Facettenfelder (section_*) werden intern geschrieben, um Qdrant-Filter zu beschleunigen."
|
||||
),
|
||||
)
|
||||
def create_plan_template(t: PlanTemplate):
|
||||
_ensure_collection(PLAN_TEMPLATE_COLLECTION)
|
||||
payload = t.model_dump()
|
||||
payload["goals"] = _norm_list(payload.get("goals"))
|
||||
sections = payload.get("sections", []) or []
|
||||
for s in sections:
|
||||
s["must_keywords"] = _norm_list(s.get("must_keywords") or [])
|
||||
s["ideal_keywords"] = _norm_list(s.get("ideal_keywords") or [])
|
||||
s["supplement_keywords"] = _norm_list(s.get("supplement_keywords") or [])
|
||||
s["forbid_keywords"] = _norm_list(s.get("forbid_keywords") or [])
|
||||
|
||||
# Materialisierte Facetten (KEYWORD-Indizes)
|
||||
payload["section_names"] = _norm_list([s.get("name", "") for s in sections])
|
||||
payload["section_must_keywords"] = _norm_list([kw for s in sections for kw in (s.get("must_keywords") or [])])
|
||||
payload["section_ideal_keywords"] = _norm_list([kw for s in sections for kw in (s.get("ideal_keywords") or [])])
|
||||
payload["section_supplement_keywords"] = _norm_list([kw for s in sections for kw in (s.get("supplement_keywords") or [])])
|
||||
payload["section_forbid_keywords"] = _norm_list([kw for s in sections for kw in (s.get("forbid_keywords") or [])])
|
||||
|
||||
vec = _embed(_template_embed_text(t))
|
||||
qdrant.upsert(collection_name=PLAN_TEMPLATE_COLLECTION, points=[PointStruct(id=str(t.id), vector=vec, payload=payload)])
|
||||
return t
|
||||
|
||||
@router.get(
|
||||
"/plan_templates/{tpl_id}",
|
||||
response_model=PlanTemplate,
|
||||
summary="Read a plan template by id",
|
||||
description="Liest ein Template anhand seiner ID und gibt nur die Schemafelder zurück (zusätzliche Payload wird herausgefiltert).",
|
||||
)
|
||||
def get_plan_template(tpl_id: str):
|
||||
_ensure_collection(PLAN_TEMPLATE_COLLECTION)
|
||||
found = _get_by_field(PLAN_TEMPLATE_COLLECTION, "id", tpl_id)
|
||||
if not found:
|
||||
raise HTTPException(status_code=404, detail="not found")
|
||||
return _as_model(PlanTemplate, found["payload"])
|
||||
|
||||
@router.get(
|
||||
"/plan_templates",
|
||||
response_model=PlanTemplateList,
|
||||
summary="List plan templates (filterable)",
|
||||
description=(
|
||||
"Listet Plan-Templates mit Filtern.\n\n"
|
||||
"**Filter** (exakte Matches, KEYWORD-Felder):\n"
|
||||
"- discipline, age_group, target_group\n"
|
||||
"- section: Section-Name (nutzt materialisierte `section_names`)\n"
|
||||
"- goal: Ziel (nutzt `goals`)\n"
|
||||
"- keyword: trifft auf beliebige Section-Keyword-Felder (must/ideal/supplement/forbid).\n\n"
|
||||
"**Pagination:** limit/offset. Feld `count` entspricht der Anzahl zurückgegebener Items (keine Gesamtsumme)."
|
||||
),
|
||||
)
|
||||
def list_plan_templates(
|
||||
discipline: Optional[str] = Query(None, description="Filter: Disziplin (exaktes KEYWORD-Match)", example="Karate"),
|
||||
age_group: Optional[str] = Query(None, description="Filter: Altersgruppe", example="Teenager"),
|
||||
target_group: Optional[str] = Query(None, description="Filter: Zielgruppe", example="Breitensport"),
|
||||
section: Optional[str] = Query(None, description="Filter: Section-Name (materialisiert)", example="Warmup"),
|
||||
goal: Optional[str] = Query(None, description="Filter: Trainingsziel", example="Technik"),
|
||||
keyword: Optional[str] = Query(None, description="Filter: Keyword in must/ideal/supplement/forbid", example="Koordination"),
|
||||
limit: int = Query(20, ge=1, le=200, description="Max. Anzahl Items"),
|
||||
offset: int = Query(0, ge=0, description="Start-Offset für Paging"),
|
||||
):
|
||||
_ensure_collection(PLAN_TEMPLATE_COLLECTION)
|
||||
must: List[Any] = []
|
||||
should: List[Any] = []
|
||||
if discipline:
|
||||
must.append(FieldCondition(key="discipline", match=MatchValue(value=discipline)))
|
||||
if age_group:
|
||||
must.append(FieldCondition(key="age_group", match=MatchValue(value=age_group)))
|
||||
if target_group:
|
||||
must.append(FieldCondition(key="target_group", match=MatchValue(value=target_group)))
|
||||
if section:
|
||||
must.append(FieldCondition(key="section_names", match=MatchValue(value=section)))
|
||||
if goal:
|
||||
must.append(FieldCondition(key="goals", match=MatchValue(value=goal)))
|
||||
if keyword:
|
||||
for k in ("section_must_keywords","section_ideal_keywords","section_supplement_keywords","section_forbid_keywords"):
|
||||
should.append(FieldCondition(key=k, match=MatchValue(value=keyword)))
|
||||
|
||||
flt = Filter(must=must or None, should=should or None) if (must or should) else None
|
||||
need = max(offset + limit, 1)
|
||||
pts = _scroll_collect(PLAN_TEMPLATE_COLLECTION, flt, need)
|
||||
items: List[PlanTemplate] = []
|
||||
for p in pts[offset:offset+limit]:
|
||||
payload = dict(p.payload or {})
|
||||
payload.setdefault("id", str(p.id))
|
||||
items.append(_as_model(PlanTemplate, payload))
|
||||
return PlanTemplateList(items=items, limit=limit, offset=offset, count=len(items))
|
||||
|
||||
# -----------------
|
||||
# Endpoints: Pläne
|
||||
# -----------------
|
||||
@router.post(
|
||||
"/plan",
|
||||
response_model=Plan,
|
||||
summary="Create a concrete training plan",
|
||||
description=(
|
||||
"Erstellt einen konkreten Trainingsplan.\n\n"
|
||||
"Idempotenz: gleicher Fingerprint (title + items) → gleicher Plan (kein Duplikat).\n"
|
||||
"Bei erneutem POST mit späterem `created_at` wird `created_at`/`created_at_ts` des bestehenden Plans aktualisiert."
|
||||
),
|
||||
)
|
||||
def create_plan(p: Plan):
|
||||
_ensure_collection(PLAN_COLLECTION)
|
||||
|
||||
# Template-Referenz prüfen (falls gesetzt)
|
||||
if p.template_id:
|
||||
if not _exists_in_collection(PLAN_TEMPLATE_COLLECTION, "id", p.template_id):
|
||||
raise HTTPException(status_code=422, detail=f"Unknown template_id: {p.template_id}")
|
||||
|
||||
# Optional: Strict-Mode – Exercises gegen EXERCISE_COLLECTION prüfen
|
||||
if _truthy(os.getenv("PLAN_STRICT_EXERCISES")):
|
||||
missing: List[str] = []
|
||||
for sec in p.sections or []:
|
||||
for it in sec.items or []:
|
||||
exid = (it.exercise_external_id or "").strip()
|
||||
if exid and not _exists_in_collection(EXERCISE_COLLECTION, "external_id", exid):
|
||||
missing.append(exid)
|
||||
if missing:
|
||||
raise HTTPException(status_code=422, detail={"error": "unknown exercise_external_id", "missing": sorted(set(missing))})
|
||||
|
||||
# Fingerprint
|
||||
fp = _fingerprint_for_plan(p)
|
||||
p.fingerprint = p.fingerprint or fp
|
||||
|
||||
# Ziel-ISO + TS aus Request berechnen (auch wenn Duplikat)
|
||||
req_payload = p.model_dump()
|
||||
dt = req_payload.get("created_at")
|
||||
if isinstance(dt, datetime):
|
||||
dt = dt.astimezone(timezone.utc).isoformat()
|
||||
elif isinstance(dt, str):
|
||||
try:
|
||||
_ = datetime.fromisoformat(dt.replace("Z", "+00:00"))
|
||||
except Exception:
|
||||
dt = datetime.now(timezone.utc).isoformat()
|
||||
else:
|
||||
dt = datetime.now(timezone.utc).isoformat()
|
||||
req_payload["created_at"] = dt
|
||||
req_ts = _parse_iso_to_ts(dt)
|
||||
req_payload["created_at_ts"] = float(req_ts)
|
||||
|
||||
# Dup-Check
|
||||
existing = _get_by_field(PLAN_COLLECTION, "fingerprint", p.fingerprint)
|
||||
if existing:
|
||||
# Falls neues created_at später ist → gespeicherten Plan aktualisieren
|
||||
cur = existing["payload"]
|
||||
cur_ts = cur.get("created_at_ts")
|
||||
if cur_ts is None:
|
||||
cur_ts = _parse_iso_to_ts(str(cur.get("created_at", dt)))
|
||||
if req_ts > float(cur_ts):
|
||||
try:
|
||||
qdrant.set_payload(
|
||||
collection_name=PLAN_COLLECTION,
|
||||
payload={"created_at": req_payload["created_at"], "created_at_ts": req_payload["created_at_ts"]},
|
||||
points=[existing["id"]],
|
||||
)
|
||||
# Antwort-Objekt aktualisieren
|
||||
cur["created_at"] = req_payload["created_at"]
|
||||
cur["created_at_ts"] = req_payload["created_at_ts"]
|
||||
except Exception:
|
||||
pass
|
||||
return _as_model(Plan, cur)
|
||||
|
||||
# Neu anlegen
|
||||
p.goals = _norm_list(p.goals)
|
||||
payload = req_payload # enthält bereits korrektes created_at + created_at_ts
|
||||
payload.update({
|
||||
"id": p.id,
|
||||
"template_id": p.template_id,
|
||||
"title": p.title,
|
||||
"discipline": p.discipline,
|
||||
"age_group": p.age_group,
|
||||
"target_group": p.target_group,
|
||||
"total_minutes": p.total_minutes,
|
||||
"sections": [s.model_dump() for s in p.sections],
|
||||
"goals": _norm_list(p.goals),
|
||||
"capability_summary": p.capability_summary,
|
||||
"novelty_against_last_n": p.novelty_against_last_n,
|
||||
"fingerprint": p.fingerprint,
|
||||
"created_by": p.created_by,
|
||||
"source": p.source,
|
||||
})
|
||||
# Section-Namen materialisieren
|
||||
payload["plan_section_names"] = _norm_list([ (s.get("name") or "").strip() for s in (payload.get("sections") or []) if isinstance(s, dict) ])
|
||||
|
||||
vec = _embed(_plan_embed_text(p))
|
||||
qdrant.upsert(collection_name=PLAN_COLLECTION, points=[PointStruct(id=str(p.id), vector=vec, payload=payload)])
|
||||
return p
|
||||
|
||||
@router.get(
|
||||
"/plan/{plan_id}",
|
||||
response_model=Plan,
|
||||
summary="Read a training plan by id",
|
||||
description="Liest einen Plan anhand seiner ID. `created_at` wird (falls ISO-String) zu `datetime` geparst.",
|
||||
)
|
||||
def get_plan(plan_id: str):
|
||||
_ensure_collection(PLAN_COLLECTION)
|
||||
found = _get_by_field(PLAN_COLLECTION, "id", plan_id)
|
||||
if not found:
|
||||
raise HTTPException(status_code=404, detail="not found")
|
||||
payload = found["payload"]
|
||||
if isinstance(payload.get("created_at"), str):
|
||||
try:
|
||||
payload["created_at"] = datetime.fromisoformat(payload["created_at"])
|
||||
except Exception:
|
||||
pass
|
||||
return _as_model(Plan, payload)
|
||||
|
||||
@router.get(
|
||||
"/plans",
|
||||
response_model=PlanList,
|
||||
summary="List training plans (filterable)",
|
||||
description=(
|
||||
"Listet Trainingspläne mit Filtern.\n\n"
|
||||
"**Filter** (exakte Matches, KEYWORD-Felder):\n"
|
||||
"- created_by, discipline, age_group, target_group, goal\n"
|
||||
"- section: Section-Name (nutzt materialisiertes `plan_section_names`)\n"
|
||||
"- created_from / created_to: ISO-8601 Zeitfenster → serverseitiger Range-Filter über `created_at_ts` (FLOAT). "
|
||||
"Falls 0 Treffer: zweiter Durchlauf ohne Zeit-Range + lokale Zeitprüfung.\n\n"
|
||||
"**Pagination:** limit/offset. Feld `count` entspricht der Anzahl zurückgegebener Items (keine Gesamtsumme)."
|
||||
),
|
||||
)
|
||||
def list_plans(
|
||||
created_by: Optional[str] = Query(None, description="Filter: Ersteller", example="tester"),
|
||||
discipline: Optional[str] = Query(None, description="Filter: Disziplin", example="Karate"),
|
||||
age_group: Optional[str] = Query(None, description="Filter: Altersgruppe", example="Teenager"),
|
||||
target_group: Optional[str] = Query(None, description="Filter: Zielgruppe", example="Breitensport"),
|
||||
goal: Optional[str] = Query(None, description="Filter: Trainingsziel", example="Technik"),
|
||||
section: Optional[str] = Query(None, description="Filter: Section-Name", example="Warmup"),
|
||||
created_from: Optional[str] = Query(None, description="Ab-Zeitpunkt (ISO 8601, z. B. 2025-08-12T00:00:00Z)", example="2025-08-12T00:00:00Z"),
|
||||
created_to: Optional[str] = Query(None, description="Bis-Zeitpunkt (ISO 8601)", example="2025-08-13T00:00:00Z"),
|
||||
limit: int = Query(20, ge=1, le=200, description="Max. Anzahl Items"),
|
||||
offset: int = Query(0, ge=0, description="Start-Offset für Paging"),
|
||||
):
|
||||
_ensure_collection(PLAN_COLLECTION)
|
||||
|
||||
# Grundfilter (ohne Zeit)
|
||||
base_must: List[Any] = []
|
||||
if created_by:
|
||||
base_must.append(FieldCondition(key="created_by", match=MatchValue(value=created_by)))
|
||||
if discipline:
|
||||
base_must.append(FieldCondition(key="discipline", match=MatchValue(value=discipline)))
|
||||
if age_group:
|
||||
base_must.append(FieldCondition(key="age_group", match=MatchValue(value=age_group)))
|
||||
if target_group:
|
||||
base_must.append(FieldCondition(key="target_group", match=MatchValue(value=target_group)))
|
||||
if goal:
|
||||
base_must.append(FieldCondition(key="goals", match=MatchValue(value=goal)))
|
||||
if section:
|
||||
base_must.append(FieldCondition(key="plan_section_names", match=MatchValue(value=section)))
|
||||
|
||||
# Serverseitiger Zeitbereich
|
||||
range_args: Dict[str, float] = {}
|
||||
try:
|
||||
if created_from:
|
||||
range_args["gte"] = float(datetime.fromisoformat(created_from.replace("Z", "+00:00")).timestamp())
|
||||
if created_to:
|
||||
range_args["lte"] = float(datetime.fromisoformat(created_to.replace("Z", "+00:00")).timestamp())
|
||||
except Exception:
|
||||
range_args = {}
|
||||
|
||||
applied_server_range = bool(range_args)
|
||||
must_with_time = list(base_must)
|
||||
if applied_server_range:
|
||||
must_with_time.append(FieldCondition(key="created_at_ts", range=Range(**range_args)))
|
||||
|
||||
need = max(offset + limit, 1)
|
||||
|
||||
# 1) Scroll mit Zeit-Range (falls vorhanden)
|
||||
pts = _scroll_collect(PLAN_COLLECTION, Filter(must=must_with_time or None) if must_with_time else None, need)
|
||||
|
||||
# 2) Fallback: 0 Treffer → ohne Zeit-Range scrollen und lokal filtern
|
||||
fallback_local_time_check = False
|
||||
if applied_server_range and not pts:
|
||||
pts = _scroll_collect(PLAN_COLLECTION, Filter(must=base_must or None) if base_must else None, need)
|
||||
fallback_local_time_check = True
|
||||
|
||||
def _in_window(py: Dict[str, Any]) -> bool:
|
||||
if not (created_from or created_to):
|
||||
return True
|
||||
if applied_server_range and not fallback_local_time_check:
|
||||
return True # serverseitig bereits gefiltert
|
||||
ts = py.get("created_at")
|
||||
if isinstance(ts, dict) and ts.get("$date"):
|
||||
ts = ts["$date"]
|
||||
if isinstance(py.get("created_at_ts"), (int, float)):
|
||||
dt = datetime.fromtimestamp(float(py["created_at_ts"]), tz=timezone.utc)
|
||||
elif isinstance(ts, str):
|
||||
try:
|
||||
dt = datetime.fromisoformat(ts.replace("Z", "+00:00"))
|
||||
except Exception:
|
||||
return False
|
||||
elif isinstance(ts, datetime):
|
||||
dt = ts
|
||||
else:
|
||||
return False
|
||||
ok = True
|
||||
if created_from:
|
||||
try:
|
||||
ok = ok and dt >= datetime.fromisoformat(created_from.replace("Z", "+00:00"))
|
||||
except Exception:
|
||||
pass
|
||||
if created_to:
|
||||
try:
|
||||
ok = ok and dt <= datetime.fromisoformat(created_to.replace("Z", "+00:00"))
|
||||
except Exception:
|
||||
pass
|
||||
return ok
|
||||
|
||||
payloads: List[Dict[str, Any]] = []
|
||||
for p in pts:
|
||||
py = dict(p.payload or {})
|
||||
py.setdefault("id", str(p.id))
|
||||
if _in_window(py):
|
||||
payloads.append(py)
|
||||
|
||||
sliced = payloads[offset:offset+limit]
|
||||
items = [_as_model(Plan, x) for x in sliced]
|
||||
return PlanList(items=items, limit=limit, offset=offset, count=len(items))
|
||||
117
llm-api/plan_session_router.py
Normal file
117
llm-api/plan_session_router.py
Normal file
|
|
@ -0,0 +1,117 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
plan_session_router.py – v0.2.0 (WP-15)
|
||||
|
||||
CRUD-Minimum für Plan-Sessions (POST/GET) mit Referenz-Validierung.
|
||||
Kompatibel zum Qdrant-Client-Stil der bestehenden Router.
|
||||
"""
|
||||
from fastapi import APIRouter, HTTPException
|
||||
from pydantic import BaseModel, Field, conint
|
||||
from typing import List, Optional, Dict, Any
|
||||
from uuid import uuid4
|
||||
from datetime import datetime, timezone
|
||||
import os
|
||||
|
||||
from clients import model, qdrant
|
||||
from qdrant_client.models import PointStruct, Filter, FieldCondition, MatchValue, VectorParams, Distance
|
||||
|
||||
router = APIRouter(tags=["plan_sessions"])
|
||||
|
||||
# -----------------
|
||||
# Konfiguration
|
||||
# -----------------
|
||||
PLAN_SESSION_COLLECTION = os.getenv("PLAN_SESSION_COLLECTION", "plan_sessions")
|
||||
PLAN_COLLECTION = os.getenv("PLAN_COLLECTION", "plans")
|
||||
|
||||
# -----------------
|
||||
# Modelle
|
||||
# -----------------
|
||||
class Feedback(BaseModel):
|
||||
rating: conint(ge=1, le=5)
|
||||
notes: str
|
||||
|
||||
class PlanSession(BaseModel):
|
||||
id: str = Field(default_factory=lambda: str(uuid4()))
|
||||
plan_id: str
|
||||
executed_at: datetime
|
||||
location: str
|
||||
coach: str
|
||||
group_label: str
|
||||
feedback: Feedback
|
||||
used_equipment: List[str] = []
|
||||
|
||||
# -----------------
|
||||
# Helpers
|
||||
# -----------------
|
||||
|
||||
def _ensure_collection(name: str):
|
||||
if not qdrant.collection_exists(name):
|
||||
qdrant.recreate_collection(
|
||||
collection_name=name,
|
||||
vectors_config=VectorParams(size=model.get_sentence_embedding_dimension(), distance=Distance.COSINE),
|
||||
)
|
||||
|
||||
|
||||
def _norm_list(xs: List[str]) -> List[str]:
|
||||
seen, out = set(), []
|
||||
for x in xs or []:
|
||||
s = str(x).strip()
|
||||
k = s.casefold()
|
||||
if s and k not in seen:
|
||||
seen.add(k)
|
||||
out.append(s)
|
||||
return sorted(out, key=str.casefold)
|
||||
|
||||
|
||||
def _session_embed_text(s: PlanSession) -> str:
|
||||
parts = [s.plan_id, s.location, s.coach, s.group_label, s.feedback.notes]
|
||||
return ". ".join([p for p in parts if p])
|
||||
|
||||
|
||||
def _embed(text: str):
|
||||
return model.encode(text or "").tolist()
|
||||
|
||||
|
||||
def _get_by_field(collection: str, key: str, value: Any) -> Optional[Dict[str, Any]]:
|
||||
flt = Filter(must=[FieldCondition(key=key, match=MatchValue(value=value))])
|
||||
pts, _ = qdrant.scroll(collection_name=collection, scroll_filter=flt, limit=1, with_payload=True)
|
||||
if not pts:
|
||||
return None
|
||||
payload = dict(pts[0].payload or {})
|
||||
payload.setdefault("id", str(pts[0].id))
|
||||
return payload
|
||||
|
||||
# -----------------
|
||||
# Endpoints
|
||||
# -----------------
|
||||
@router.post("/plan_sessions", response_model=PlanSession)
|
||||
def create_plan_session(s: PlanSession):
|
||||
_ensure_collection(PLAN_SESSION_COLLECTION)
|
||||
# Referenz auf Plan prüfen
|
||||
if not _get_by_field(PLAN_COLLECTION, "id", s.plan_id):
|
||||
raise HTTPException(status_code=422, detail=f"Unknown plan_id: {s.plan_id}")
|
||||
|
||||
# Normalisieren
|
||||
s.used_equipment = _norm_list(s.used_equipment)
|
||||
payload = s.model_dump()
|
||||
# ISO8601 für executed_at sicherstellen
|
||||
if isinstance(payload.get("executed_at"), datetime):
|
||||
payload["executed_at"] = payload["executed_at"].astimezone(timezone.utc).isoformat()
|
||||
|
||||
vec = _embed(_session_embed_text(s))
|
||||
qdrant.upsert(collection_name=PLAN_SESSION_COLLECTION, points=[PointStruct(id=str(s.id), vector=vec, payload=payload)])
|
||||
return s
|
||||
|
||||
|
||||
@router.get("/plan_sessions/{session_id}", response_model=PlanSession)
|
||||
def get_plan_session(session_id: str):
|
||||
_ensure_collection(PLAN_SESSION_COLLECTION)
|
||||
found = _get_by_field(PLAN_SESSION_COLLECTION, "id", session_id)
|
||||
if not found:
|
||||
raise HTTPException(status_code=404, detail="not found")
|
||||
if isinstance(found.get("executed_at"), str):
|
||||
try:
|
||||
found["executed_at"] = datetime.fromisoformat(found["executed_at"])
|
||||
except Exception:
|
||||
pass
|
||||
return PlanSession(**found)
|
||||
|
|
@ -1,173 +1,534 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
File: wiki_router.py
|
||||
Beschreibung:
|
||||
- Endpunkte für MediaWiki-Integration im lokalen Netzwerk.
|
||||
- Funktionen:
|
||||
* /health: Prüft Verfügbarkeit der MediaWiki-API.
|
||||
* /login: Führt clientlogin durch und speichert Session-Cookies.
|
||||
* /semantic/pages: Listet alle Übungen inkl. Unterkategorien via SMW-Ask.
|
||||
* /parsepage: Ruft Roh-Wikitext über action=parse für eine Seite ab.
|
||||
* /info: Liefert pageid und fullurl über Core-API Query.
|
||||
* /semantic/page: Liefert Metadaten einer Übung und Wikitext sowie pageid über Core-API.
|
||||
Version: 1.2.0
|
||||
wiki_router.py – v1.4.3 (Swagger + robustes .env + optionaler ENV-Login)
|
||||
|
||||
Änderungen ggü. v1.4.2:
|
||||
- **/login/env** hinzugefügt: Login mit WIKI_BOT_USER/WIKI_BOT_PASSWORD aus ENV (Secrets werden nie ausgegeben)
|
||||
- .env-Bootstrap robuster und **vor** dem ersten Aufruf geloggt
|
||||
- /.meta/env/runtime um Credentials-Flags ergänzt (ohne Klartext)
|
||||
- response_description-Strings mit JSON-Beispielen sauber gequotet
|
||||
- Keine Breaking-Changes (Signaturen & Pfade unverändert)
|
||||
|
||||
Prefix-Hinweis:
|
||||
- Der Router setzt `prefix="/import/wiki"`. In `llm_api.py` **ohne** weiteren Prefix einbinden.
|
||||
"""
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv()
|
||||
|
||||
from typing import Dict, Any, Optional, List
|
||||
from fastapi import APIRouter, HTTPException, Query
|
||||
from pydantic import BaseModel
|
||||
from typing import Dict, Any, List
|
||||
import requests, os
|
||||
from pydantic import BaseModel, Field
|
||||
from textwrap import dedent
|
||||
import os, time, logging
|
||||
import requests
|
||||
from dotenv import load_dotenv, find_dotenv
|
||||
from starlette.responses import PlainTextResponse
|
||||
|
||||
__version__ = "1.2.0"
|
||||
router = APIRouter()
|
||||
# -------------------------------------------------
|
||||
# Logging **vor** .env-Bootstrap initialisieren
|
||||
# -------------------------------------------------
|
||||
logger = logging.getLogger("wiki_router")
|
||||
logger.setLevel(logging.INFO)
|
||||
|
||||
# -------------------------------------------------
|
||||
# Robustes .env-Loading (findet Datei auch außerhalb des CWD)
|
||||
# -------------------------------------------------
|
||||
|
||||
def _bootstrap_env() -> Optional[str]:
|
||||
"""Versucht mehrere typische Pfade für .env zu laden und loggt die Fundstelle.
|
||||
Reihenfolge:
|
||||
1) env `LLMAPI_ENV_FILE`
|
||||
2) find_dotenv() relativ zum CWD
|
||||
3) CWD/.env
|
||||
4) Verzeichnis dieser Datei /.env
|
||||
5) $HOME/.env
|
||||
6) $HOME/.llm-api.env
|
||||
7) /etc/llm-api.env
|
||||
"""
|
||||
candidates: List[str] = []
|
||||
if os.getenv("LLMAPI_ENV_FILE"):
|
||||
candidates.append(os.getenv("LLMAPI_ENV_FILE") or "")
|
||||
fd = find_dotenv(".env", usecwd=True)
|
||||
if fd:
|
||||
candidates.append(fd)
|
||||
candidates += [
|
||||
os.path.join(os.getcwd(), ".env"),
|
||||
os.path.join(os.path.dirname(__file__), ".env"),
|
||||
os.path.expanduser("~/.env"),
|
||||
os.path.expanduser("~/.llm-api.env"),
|
||||
"/etc/llm-api.env",
|
||||
]
|
||||
for path in candidates:
|
||||
try:
|
||||
if path and os.path.exists(path):
|
||||
loaded = load_dotenv(path, override=False)
|
||||
if loaded:
|
||||
logger.info("wiki_router: .env geladen aus %s", path)
|
||||
return path
|
||||
except Exception as e:
|
||||
logger.warning("wiki_router: .env laden fehlgeschlagen (%s): %s", path, e)
|
||||
logger.info("wiki_router: keine .env gefunden – verwende Prozess-Umgebung")
|
||||
return None
|
||||
|
||||
_BOOTSTRAP_ENV = _bootstrap_env()
|
||||
|
||||
# -------------------------------------------------
|
||||
# Router & Konfiguration
|
||||
# -------------------------------------------------
|
||||
router = APIRouter(prefix="/import/wiki", tags=["wiki"])
|
||||
|
||||
# Hinweis: Werte werden NACH dem .env-Bootstrap aus os.environ gelesen.
|
||||
# Änderungen an .env erfordern i. d. R. einen Neustart des Dienstes.
|
||||
WIKI_API_URL = os.getenv("WIKI_API_URL", "https://karatetrainer.net/api.php")
|
||||
wiki_session = requests.Session()
|
||||
WIKI_TIMEOUT = float(os.getenv("WIKI_TIMEOUT", "15"))
|
||||
WIKI_BATCH = int(os.getenv("WIKI_BATCH", "50"))
|
||||
WIKI_RETRIES = int(os.getenv("WIKI_RETRIES", "1")) # zusätzliche Versuche bei Upstream-Fehlern
|
||||
WIKI_SLEEPMS = int(os.getenv("WIKI_SLEEP_MS", "0")) # Throttle zwischen Requests (Millisekunden)
|
||||
|
||||
# Single Session (Cookies für Login)
|
||||
wiki_session = requests.Session()
|
||||
wiki_session.headers.update({"User-Agent": "local-llm-wiki-proxy/1.4.3"})
|
||||
|
||||
# -------------------------------------------------
|
||||
# Schemas
|
||||
# -------------------------------------------------
|
||||
class WikiLoginRequest(BaseModel):
|
||||
username: str
|
||||
password: str
|
||||
username: str = Field(..., description="MediaWiki-Benutzername (kein .env-Wert)")
|
||||
password: str = Field(..., description="MediaWiki-Passwort (kein .env-Wert)")
|
||||
model_config = {"json_schema_extra": {"example": {"username": "Bot", "password": "••••••"}}}
|
||||
|
||||
class WikiLoginResponse(BaseModel):
|
||||
status: str
|
||||
message: str | None = None
|
||||
|
||||
class PageContentResponse(BaseModel):
|
||||
pageid: int
|
||||
title: str
|
||||
wikitext: str
|
||||
status: str = Field(..., description="'success' bei erfolgreichem Login")
|
||||
message: Optional[str] = Field(None, description="Optionale Zusatzinfos")
|
||||
|
||||
class PageInfoResponse(BaseModel):
|
||||
pageid: int
|
||||
title: str
|
||||
fullurl: str
|
||||
pageid: int = Field(..., description="Eindeutige PageID der MediaWiki-Seite")
|
||||
title: str = Field(..., description="Aufgelöster Titel (kann von Eingabe abweichen, z. B. Redirect/Normalize)")
|
||||
fullurl: str = Field(..., description="Kanonsiche URL zur Seite")
|
||||
model_config = {"json_schema_extra": {"example": {"pageid": 218, "title": "Affenklatschen", "fullurl": "https://…/index.php?title=Affenklatschen"}}}
|
||||
|
||||
# Health-Check
|
||||
@router.get("/health")
|
||||
def health_check():
|
||||
try:
|
||||
resp = wiki_session.get(
|
||||
WIKI_API_URL,
|
||||
params={"action": "query", "meta": "siteinfo", "siprop": "general", "format": "json"},
|
||||
timeout=5
|
||||
)
|
||||
resp.raise_for_status()
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=502, detail=f"Wiki nicht erreichbar: {e}")
|
||||
class PageContentResponse(BaseModel):
|
||||
pageid: int = Field(..., description="PageID der angefragten Seite")
|
||||
title: str = Field(..., description="Echo des mitgegebenen Titels (optional)")
|
||||
wikitext: str = Field(..., description="Roh-Wikitext (inkl. Templates), keine Sanitization")
|
||||
model_config = {"json_schema_extra": {"example": {"pageid": 218, "title": "Affenklatschen", "wikitext": "{{ÜbungInfoBox|…}}"}}}
|
||||
|
||||
# -------------------------------------------------
|
||||
# Utils
|
||||
# -------------------------------------------------
|
||||
|
||||
def _sleep():
|
||||
if WIKI_SLEEPMS > 0:
|
||||
time.sleep(WIKI_SLEEPMS / 1000.0)
|
||||
|
||||
|
||||
def _request_with_retry(method: str, params: Dict[str, Any], *, data: Dict[str, Any] | None = None) -> requests.Response:
|
||||
"""Wrapper um requests.* mit Retry/Throttle und konsistenten 502-Fehlern bei Upstream-Problemen.
|
||||
Nutzt .env: WIKI_RETRIES, WIKI_SLEEP_MS, WIKI_TIMEOUT, WIKI_API_URL
|
||||
"""
|
||||
last_exc: Optional[Exception] = None
|
||||
for attempt in range(WIKI_RETRIES + 1):
|
||||
try:
|
||||
if method == "GET":
|
||||
resp = wiki_session.get(WIKI_API_URL, params=params, timeout=WIKI_TIMEOUT)
|
||||
else:
|
||||
resp = wiki_session.post(WIKI_API_URL, data=data or params, timeout=WIKI_TIMEOUT)
|
||||
resp.raise_for_status()
|
||||
return resp
|
||||
except Exception as e:
|
||||
last_exc = e
|
||||
logger.warning("Upstream error on %s (try %d/%d): %s", method, attempt + 1, WIKI_RETRIES + 1, e)
|
||||
_sleep()
|
||||
# alle Versuche erschöpft
|
||||
raise HTTPException(status_code=502, detail=f"Upstream error: {last_exc}")
|
||||
|
||||
|
||||
def _normalize_variants(title: str) -> List[str]:
|
||||
"""Erzeuge robuste Titel-Varianten: Leerzeichen/Unterstrich, Bindestrich/Dash-Varianten."""
|
||||
t = (title or "").strip()
|
||||
variants = {t}
|
||||
if " " in t:
|
||||
variants.add(t.replace(" ", "_"))
|
||||
# Bindestrich / Gedankenstrich Varianten
|
||||
for a, b in [("-", "–"), ("-", "—"), ("–", "-"), ("—", "-")]:
|
||||
if a in t:
|
||||
variants.add(t.replace(a, b))
|
||||
return list(variants)
|
||||
|
||||
|
||||
def _fetch_pageinfo_batch(titles: List[str]) -> Dict[str, Dict[str, Any]]:
|
||||
"""Batch-Resolver für PageInfo (pageid, fullurl). Respektiert Redirects & Titel-Normalisierung.
|
||||
Achtung: Große Kategorien werden in Chunks à WIKI_BATCH verarbeitet. Throttling via WIKI_SLEEP_MS.
|
||||
"""
|
||||
if not titles:
|
||||
return {}
|
||||
out: Dict[str, Dict[str, Any]] = {}
|
||||
for i in range(0, len(titles), max(1, WIKI_BATCH)):
|
||||
chunk = titles[i:i + max(1, WIKI_BATCH)]
|
||||
params = {
|
||||
"action": "query",
|
||||
"format": "json",
|
||||
"prop": "info",
|
||||
"inprop": "url",
|
||||
"redirects": 1,
|
||||
"converttitles": 1,
|
||||
"titles": "|".join(chunk),
|
||||
}
|
||||
resp = _request_with_retry("GET", params)
|
||||
data = resp.json() or {}
|
||||
q = data.get("query", {})
|
||||
redirects = {d.get("from"): d.get("to") for d in (q.get("redirects") or [])}
|
||||
pages = q.get("pages", {}) or {}
|
||||
for pid_str, page in pages.items():
|
||||
if page.get("missing") is not None or str(pid_str) == "-1":
|
||||
continue
|
||||
try:
|
||||
pid = int(pid_str)
|
||||
except ValueError:
|
||||
pid = int(page.get("pageid", -1))
|
||||
title_out = page.get("title")
|
||||
fullurl = page.get("fullurl") or page.get("canonicalurl") or ""
|
||||
if not title_out:
|
||||
continue
|
||||
out[title_out] = {"pageid": pid, "fullurl": fullurl}
|
||||
# auch Originaltitel der Redirects auflösen
|
||||
for frm, to in redirects.items():
|
||||
if to == title_out and frm not in out:
|
||||
out[frm] = {"pageid": pid, "fullurl": fullurl}
|
||||
_sleep()
|
||||
return out
|
||||
|
||||
# -------------------------------------------------
|
||||
# Doku-Konstanten (Markdown/.env)
|
||||
# -------------------------------------------------
|
||||
MANUAL_WIKI_IMPORTER = dedent("""
|
||||
# wiki_importer.py – Kurzanleitung
|
||||
|
||||
## Voraussetzungen
|
||||
- API erreichbar: `GET /import/wiki/health` (Status `ok`)
|
||||
- .env:
|
||||
- `API_BASE_URL=http://localhost:8000`
|
||||
- `WIKI_BOT_USER`, `WIKI_BOT_PASSWORD`
|
||||
- optional: `EXERCISE_COLLECTION=exercises`
|
||||
|
||||
## Smoke-Test (3 Läufe)
|
||||
```bash
|
||||
python3 wiki_importer.py --title "Affenklatschen" --category "Übungen" --smoke-test
|
||||
```
|
||||
|
||||
## Vollimport
|
||||
```bash
|
||||
python3 wiki_importer.py --all
|
||||
# optional:
|
||||
python3 wiki_importer.py --all --category "Übungen"
|
||||
python3 wiki_importer.py --all --dry-run
|
||||
```
|
||||
|
||||
## Idempotenz-Logik
|
||||
- external_id = `mw:{pageid}`
|
||||
- Fingerprint (sha256) über: `title, summary, execution, notes, duration_minutes, capabilities, keywords`
|
||||
- Entscheid:
|
||||
- not found → create
|
||||
- fingerprint gleich → skip
|
||||
- fingerprint ungleich → update (+ `imported_at`)
|
||||
|
||||
## Mapping (Wiki → Exercise)
|
||||
- Schlüsselworte → `keywords` (`,`-getrennt, getrimmt, dedupliziert)
|
||||
- Hilfsmittel → `equipment`
|
||||
- Disziplin → `discipline`
|
||||
- Durchführung/Notizen/Vorbereitung/Methodik → `execution`, `notes`, `preparation`, `method`
|
||||
- Capabilities → `capabilities` (Level 1..5) + Facetten (`capability_ge1..5`, `capability_eq1..5`, `capability_keys`)
|
||||
- Metadaten → `external_id`, `source="mediawiki"`, `imported_at`
|
||||
|
||||
## Troubleshooting
|
||||
- 404 bei `/import/wiki/info?...`: prüfe Prefix (kein Doppelprefix), Titelvarianten
|
||||
- 401 Login: echte User-Creds verwenden
|
||||
- 502 Upstream: `WIKI_API_URL`/TLS prüfen; Timeouts/Retry/Throttle (`WIKI_TIMEOUT`, `WIKI_RETRIES`, `WIKI_SLEEP_MS`)
|
||||
""")
|
||||
|
||||
ENV_DOC = [
|
||||
{"name": "WIKI_API_URL", "desc": "Basis-URL zur MediaWiki-API (z. B. http://…/w/api.php)"},
|
||||
{"name": "WIKI_TIMEOUT", "desc": "Timeout in Sekunden (Default 15)"},
|
||||
{"name": "WIKI_RETRIES", "desc": "Anzahl zusätzlicher Versuche (Default 1)"},
|
||||
{"name": "WIKI_SLEEP_MS", "desc": "Throttle zwischen Requests in Millisekunden (Default 0)"},
|
||||
{"name": "WIKI_BATCH", "desc": "Batchgröße für Titel-Enrichment (Default 50)"},
|
||||
{"name": "WIKI_BOT_USER", "desc": "(optional) Benutzername für /login/env – **Wert wird nie im Klartext zurückgegeben**"},
|
||||
{"name": "WIKI_BOT_PASSWORD", "desc": "(optional) Passwort für /login/env – **Wert wird nie im Klartext zurückgegeben**"},
|
||||
]
|
||||
|
||||
# -------------------------------------------------
|
||||
# Doku-/Meta-Endpunkte
|
||||
# -------------------------------------------------
|
||||
@router.get(
|
||||
"/manual/wiki_importer",
|
||||
summary="Handbuch: wiki_importer.py (Markdown)",
|
||||
description="Kompaktes Handbuch mit .env-Hinweisen, Aufrufen, Idempotenz und Troubleshooting.",
|
||||
response_class=PlainTextResponse,
|
||||
response_description="Markdown-Text.",
|
||||
openapi_extra={
|
||||
"x-codeSamples": [
|
||||
{"lang": "bash", "label": "Vollimport (Standard)", "source": "python3 wiki_importer.py --all"},
|
||||
{"lang": "bash", "label": "Dry-Run + Kategorie", "source": "python3 wiki_importer.py --all --category \"Übungen\" --dry-run"},
|
||||
]
|
||||
},
|
||||
)
|
||||
def manual_wiki_importer():
|
||||
return MANUAL_WIKI_IMPORTER
|
||||
|
||||
|
||||
@router.get(
|
||||
"/meta/env",
|
||||
summary=".env Referenz (Wiki-bezogen)",
|
||||
description="Listet die relevanten Umgebungsvariablen für die Wiki-Integration auf (ohne Werte).",
|
||||
response_description="Array aus {name, desc}.",
|
||||
)
|
||||
def meta_env() -> List[Dict[str, str]]:
|
||||
return ENV_DOC
|
||||
|
||||
|
||||
@router.get(
|
||||
"/meta/env/runtime",
|
||||
summary=".env Runtime (wirksame Werte)",
|
||||
description="Zeigt die aktuell wirksamen Konfigurationswerte für den Wiki-Router (ohne Secrets) und die geladene .env-Quelle.",
|
||||
response_description="Objekt mit 'loaded_from' und 'env' (Key→Value).",
|
||||
)
|
||||
def meta_env_runtime() -> Dict[str, Any]:
|
||||
keys = ["WIKI_API_URL", "WIKI_TIMEOUT", "WIKI_RETRIES", "WIKI_SLEEP_MS", "WIKI_BATCH"]
|
||||
has_user = bool(os.getenv("WIKI_BOT_USER"))
|
||||
has_pwd = bool(os.getenv("WIKI_BOT_PASSWORD"))
|
||||
return {
|
||||
"loaded_from": _BOOTSTRAP_ENV,
|
||||
"env": {k: os.getenv(k) for k in keys},
|
||||
"credentials": {
|
||||
"WIKI_BOT_USER_set": has_user,
|
||||
"WIKI_BOT_PASSWORD_set": has_pwd,
|
||||
"ready_for_login_env": has_user and has_pwd,
|
||||
},
|
||||
}
|
||||
|
||||
# -------------------------------------------------
|
||||
# API-Endpunkte
|
||||
# -------------------------------------------------
|
||||
@router.get(
|
||||
"/health",
|
||||
summary="Ping & Site-Info des MediaWiki-Upstreams",
|
||||
description=dedent(
|
||||
"""
|
||||
Führt einen leichten `meta=siteinfo`-Request gegen den konfigurierten MediaWiki-Upstream aus.
|
||||
|
||||
**Besonderheiten**
|
||||
- Nutzt eine persistente `requests.Session` (Cookies werden für spätere Aufrufe wiederverwendet).
|
||||
- Respektiert `.env`: `WIKI_API_URL`, `WIKI_TIMEOUT`, `WIKI_RETRIES`, `WIKI_SLEEP_MS`.
|
||||
- Bei Upstream-Problemen wird **HTTP 502** geworfen (statt 500).
|
||||
|
||||
**Hinweis**: Je nach Wiki-Konfiguration sind detaillierte Infos (Generator/Sitename) nur **nach Login** sichtbar.
|
||||
"""
|
||||
),
|
||||
response_description='`{"status":"ok"}` oder mit `wiki.sitename/generator` bei `verbose=1`.',
|
||||
openapi_extra={
|
||||
"x-codeSamples": [
|
||||
{"lang": "bash", "label": "curl", "source": "curl -s 'http://localhost:8000/import/wiki/health?verbose=1' | jq ."}
|
||||
]
|
||||
},
|
||||
)
|
||||
def health(verbose: Optional[int] = Query(default=0, description="1 = Site-Metadaten (sitename/generator) mitsenden")) -> Dict[str, Any]:
|
||||
resp = _request_with_retry("GET", {"action": "query", "meta": "siteinfo", "format": "json"})
|
||||
if verbose:
|
||||
info = resp.json().get("query", {}).get("general", {})
|
||||
return {"status": "ok", "wiki": {"sitename": info.get("sitename"), "generator": info.get("generator")}}
|
||||
return {"status": "ok"}
|
||||
|
||||
# Login Endpoint
|
||||
@router.post("/login", response_model=WikiLoginResponse)
|
||||
|
||||
@router.post(
|
||||
"/login",
|
||||
response_model=WikiLoginResponse,
|
||||
summary="MediaWiki-Login (Session-Cookies werden serverseitig gespeichert)",
|
||||
description=dedent(
|
||||
"""
|
||||
Meldet den Proxy am MediaWiki an. Unterstützt `clientlogin` (mit `loginreturnurl`) und
|
||||
**fällt zurück** auf `action=login`, falls erforderlich. Erfolgreiche Logins hinterlegen die
|
||||
Session-Cookies in der Server-Session und gelten für nachfolgende Requests.
|
||||
|
||||
**Besonderheiten**
|
||||
- Erwartet **Benutzername/Passwort im Body** (keine .env-Creds).
|
||||
- Verwendet vor dem Login ein Logintoken (`meta=tokens`).
|
||||
- Rückgabe `{\"status\":\"success\"}` bei Erfolg, sonst **401**.
|
||||
- Respektiert Retry/Throttle aus `.env`.
|
||||
"""
|
||||
),
|
||||
response_description='`{"status":"success"}` bei Erfolg.',
|
||||
)
|
||||
def login(data: WikiLoginRequest):
|
||||
# Token holen
|
||||
try:
|
||||
token_resp = wiki_session.get(
|
||||
WIKI_API_URL,
|
||||
params={"action": "query", "meta": "tokens", "type": "login", "format": "json"},
|
||||
timeout=10
|
||||
)
|
||||
token_resp.raise_for_status()
|
||||
token = token_resp.json().get("query", {}).get("tokens", {}).get("logintoken")
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=502, detail=f"Token-Error: {e}")
|
||||
tok = _request_with_retry("GET", {"action": "query", "meta": "tokens", "type": "login", "format": "json"})
|
||||
token = tok.json().get("query", {}).get("tokens", {}).get("logintoken")
|
||||
if not token:
|
||||
raise HTTPException(status_code=502, detail="Kein Login-Token erhalten")
|
||||
# clientlogin
|
||||
try:
|
||||
login_resp = wiki_session.post(
|
||||
WIKI_API_URL,
|
||||
data={
|
||||
"action": "clientlogin",
|
||||
"format": "json",
|
||||
"username": data.username,
|
||||
"password": data.password,
|
||||
"logintoken": token,
|
||||
"loginreturnurl": "http://localhost:8000"
|
||||
},
|
||||
timeout=10
|
||||
)
|
||||
login_resp.raise_for_status()
|
||||
status = login_resp.json().get("clientlogin", {}).get("status")
|
||||
except Exception:
|
||||
status = None
|
||||
# fallback login
|
||||
if status != "PASS":
|
||||
alt = wiki_session.post(
|
||||
WIKI_API_URL,
|
||||
data={"action": "login", "format": "json", "lgname": data.username, "lgpassword": data.password},
|
||||
timeout=10
|
||||
)
|
||||
alt.raise_for_status()
|
||||
status = alt.json().get("login", {}).get("result")
|
||||
if status in ("PASS", "Success"):
|
||||
return WikiLoginResponse(status="success", message=None)
|
||||
return WikiLoginResponse(status="failed", message="Login fehlgeschlagen")
|
||||
|
||||
# SMW-Ask: alle Übungen inkl. Unterkategorien
|
||||
@router.get("/semantic/pages")
|
||||
def semantic_pages(category: str = Query(..., description="Kategorie ohne 'Category:'")) -> Dict[str, Any]:
|
||||
smw_query = f"[[Category:{category}]]"
|
||||
ask_query = f"{smw_query}|limit=50000"
|
||||
r = wiki_session.get(
|
||||
WIKI_API_URL,
|
||||
params={"action": "ask", "query": ask_query, "format": "json"},
|
||||
timeout=30
|
||||
)
|
||||
# clientlogin (mit returnurl) + Fallback action=login
|
||||
try:
|
||||
r.raise_for_status()
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=502, detail=f"SMW-Ask-Error: {e}")
|
||||
return r.json().get("query", {}).get("results", {})
|
||||
cl = _request_with_retry("POST", {}, data={
|
||||
"action": "clientlogin",
|
||||
"format": "json",
|
||||
"username": data.username,
|
||||
"password": data.password,
|
||||
"logintoken": token,
|
||||
"loginreturnurl": "https://example.org/",
|
||||
})
|
||||
st = cl.json().get("clientlogin", {}).get("status")
|
||||
if st == "PASS":
|
||||
return WikiLoginResponse(status="success")
|
||||
except HTTPException:
|
||||
pass
|
||||
|
||||
# Wikitext über parse-Endpoint holen (per pageid)
|
||||
@router.get("/parsepage", response_model=PageContentResponse)
|
||||
def parse_page(pageid: int = Query(...), title: str = Query(None)):
|
||||
r = wiki_session.get(
|
||||
WIKI_API_URL,
|
||||
params={"action": "parse", "pageid": pageid, "prop": "wikitext", "format": "json"},
|
||||
timeout=20
|
||||
)
|
||||
try:
|
||||
r.raise_for_status()
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=502, detail=f"Parse-Error: {e}")
|
||||
wikitext = r.json().get("parse", {}).get("wikitext", {}).get("*", "")
|
||||
lg = _request_with_retry("POST", {}, data={
|
||||
"action": "login",
|
||||
"format": "json",
|
||||
"lgname": data.username,
|
||||
"lgpassword": data.password,
|
||||
"lgtoken": token,
|
||||
})
|
||||
res = lg.json().get("login", {}).get("result")
|
||||
if res == "Success":
|
||||
return WikiLoginResponse(status="success")
|
||||
raise HTTPException(status_code=401, detail=f"Login fehlgeschlagen: {res}")
|
||||
|
||||
|
||||
@router.post(
|
||||
"/login/env",
|
||||
response_model=WikiLoginResponse,
|
||||
summary="MediaWiki-Login mit .env-Credentials",
|
||||
description=dedent(
|
||||
"""
|
||||
Führt den Login mit **WIKI_BOT_USER/WIKI_BOT_PASSWORD** aus der Prozess-Umgebung durch.
|
||||
Praktisch für geplante Jobs/CLI ohne Übergabe im Body. Secrets werden **nie** im Klartext zurückgegeben.
|
||||
|
||||
**Voraussetzung**: Beide Variablen sind gesetzt (siehe `/import/wiki/meta/env/runtime`).
|
||||
"""
|
||||
),
|
||||
response_description='`{"status":"success"}` bei Erfolg.',
|
||||
openapi_extra={
|
||||
"x-codeSamples": [
|
||||
{"lang": "bash", "label": "curl", "source": "curl -s -X POST http://localhost:8000/import/wiki/login/env | jq ."}
|
||||
]
|
||||
},
|
||||
)
|
||||
def login_env():
|
||||
user = os.getenv("WIKI_BOT_USER")
|
||||
pwd = os.getenv("WIKI_BOT_PASSWORD")
|
||||
if not user or not pwd:
|
||||
raise HTTPException(status_code=400, detail="WIKI_BOT_USER/WIKI_BOT_PASSWORD nicht gesetzt")
|
||||
return login(WikiLoginRequest(username=user, password=pwd))
|
||||
|
||||
|
||||
@router.get(
|
||||
"/semantic/pages",
|
||||
summary="SMW-Ask-Ergebnisse einer Kategorie mit PageID/URL anreichern",
|
||||
description=dedent(
|
||||
"""
|
||||
Ruft Semantic MediaWiki via `action=ask` auf und liefert ein **Dictionary**: `{"Titel": {...}}`.
|
||||
Anschließend werden **alle** Titel batchweise via `prop=info` um `pageid` und `fullurl` ergänzt
|
||||
(berücksichtigt Redirects & Titel-Normalisierung). Große Kategorien werden in Chunks der Größe
|
||||
`WIKI_BATCH` verarbeitet. Throttling gemäß `WIKI_SLEEP_MS`.
|
||||
|
||||
**Rückgabe**
|
||||
- Key = Seitentitel
|
||||
- Value = ursprüngliche Ask-Daten **plus** `pageid` & `fullurl` (falls auflösbar)
|
||||
- Kann leeres Objekt `{}` sein (z. B. wenn Login erforderlich oder Kategorie leer).
|
||||
"""
|
||||
),
|
||||
response_description="Dictionary pro Titel; Felder `pageid/fullurl` sind evtl. nicht für alle Titel gesetzt.",
|
||||
openapi_extra={
|
||||
"x-codeSamples": [
|
||||
{"lang": "bash", "label": "curl", "source": "curl -s 'http://localhost:8000/import/wiki/semantic/pages?category=%C3%9Cbungen' | jq . | head"}
|
||||
]
|
||||
},
|
||||
)
|
||||
def semantic_pages(category: str = Query(..., description="Kategorie-Name **ohne** 'Category:' Präfix")) -> Dict[str, Any]:
|
||||
ask_query = f"[[Category:{category}]]|limit=50000"
|
||||
r = _request_with_retry("GET", {"action": "ask", "query": ask_query, "format": "json"})
|
||||
results = r.json().get("query", {}).get("results", {}) or {}
|
||||
titles = list(results.keys())
|
||||
|
||||
info_map = _fetch_pageinfo_batch(titles)
|
||||
|
||||
enriched: Dict[str, Any] = {}
|
||||
missing = 0
|
||||
for title, entry in results.items():
|
||||
base = entry if isinstance(entry, dict) else {}
|
||||
extra = info_map.get(title, {})
|
||||
if not extra:
|
||||
missing += 1
|
||||
enriched[title] = {
|
||||
**base,
|
||||
"pageid": extra.get("pageid", base.get("pageid")),
|
||||
"fullurl": extra.get("fullurl", base.get("fullurl")),
|
||||
}
|
||||
logger.info("/semantic/pages: %d Titel, %d ohne pageid nach Enrichment", len(results), missing)
|
||||
return enriched
|
||||
|
||||
|
||||
@router.get(
|
||||
"/parsepage",
|
||||
response_model=PageContentResponse,
|
||||
summary="Wikitext einer Seite per pageid holen",
|
||||
description=dedent(
|
||||
"""
|
||||
Liefert den **Roh-Wikitext** (`prop=wikitext`) zu einer Seite. Der optionale `title`-Parameter dient
|
||||
nur als Echo/Diagnose. Für strukturierte Extraktion (Infobox/Abschnitte) muss der Aufrufer den
|
||||
Wikitext selbst parsen.
|
||||
|
||||
**Besonderheiten**
|
||||
- Erfordert ggf. vorherigen Login (private Wikis).
|
||||
- Throttling/Retry gemäß `.env`.
|
||||
- Upstream-Fehler werden als **502** gemeldet.
|
||||
"""
|
||||
),
|
||||
response_description="Roh-Wikitext (keine HTML-Transformation).",
|
||||
openapi_extra={
|
||||
"x-codeSamples": [
|
||||
{"lang": "bash", "label": "curl", "source": "curl -s 'http://localhost:8000/import/wiki/parsepage?pageid=218&title=Affenklatschen' | jq ."}
|
||||
]
|
||||
},
|
||||
)
|
||||
def parse_page(pageid: int = Query(..., description="Numerische PageID der Seite"), title: str = Query(None, description="Optional: Seitentitel (nur Echo)")):
|
||||
resp = _request_with_retry("GET", {"action": "parse", "pageid": pageid, "prop": "wikitext", "format": "json"})
|
||||
wikitext = resp.json().get("parse", {}).get("wikitext", {}).get("*", "")
|
||||
return PageContentResponse(pageid=pageid, title=title or "", wikitext=wikitext)
|
||||
|
||||
# Pageinfo über Core-API (ermittelt pageid + fullurl)
|
||||
@router.get("/info", response_model=PageInfoResponse)
|
||||
def page_info(title: str = Query(..., description="Name der Seite")):
|
||||
r = wiki_session.get(
|
||||
WIKI_API_URL,
|
||||
params={"action": "query", "titles": title, "prop": "info", "inprop": "url", "format": "json"},
|
||||
timeout=10
|
||||
)
|
||||
try:
|
||||
r.raise_for_status()
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=502, detail=f"Info-Error: {e}")
|
||||
pages = r.json().get("query", {}).get("pages", {})
|
||||
pid_str, page = next(iter(pages.items()))
|
||||
pid = int(pid_str)
|
||||
fullurl = page.get("fullurl")
|
||||
return PageInfoResponse(pageid=pid, title=page.get("title"), fullurl=fullurl)
|
||||
|
||||
# Detail-Endpoint für eine Übung: Metadaten aus Ask + Wikitext & ID via Core-API
|
||||
@router.get("/semantic/page", response_model=Dict[str, Any])
|
||||
def semantic_page_detail(category: str = Query(...), title: str = Query(...)) -> Dict[str, Any]:
|
||||
# Metadaten aus SMW-Ask
|
||||
entries = semantic_pages(category)
|
||||
entry = entries.get(title)
|
||||
if not entry:
|
||||
raise HTTPException(status_code=404, detail="Übung nicht gefunden im SMW-Ask-Ergebnis.")
|
||||
# Pageinfo via Core-API
|
||||
info = page_info(title=title)
|
||||
# Wikitext via parse
|
||||
parsed = parse_page(pageid=info.pageid, title=title)
|
||||
return {
|
||||
"title": title,
|
||||
"pageid": info.pageid,
|
||||
"fullurl": info.fullurl,
|
||||
"printouts": entry.get("printouts", {}),
|
||||
"wikitext": parsed.wikitext
|
||||
}
|
||||
@router.get(
|
||||
"/info",
|
||||
response_model=PageInfoResponse,
|
||||
summary="PageID/URL zu einem Titel auflösen (inkl. Varianten)",
|
||||
description=dedent(
|
||||
"""
|
||||
Versucht zuerst eine **exakte** Auflösung des angegebenen Titels (mit `redirects=1`, `converttitles=1`).
|
||||
Falls nicht erfolgreich, werden **Titel-Varianten** getestet (Leerzeichen↔Unterstrich, Bindestrich↔Gedankenstrich).
|
||||
Bei Erfolg Rückgabe mit `pageid`, aufgelöstem `title` und `fullurl`. Andernfalls **404**.
|
||||
|
||||
**Typische Fälle**
|
||||
- Unterschiedliche Schreibweisen (z. B. "Yoko Geri" vs. "Yoko_Geri").
|
||||
- Redirect-Ketten → es wird der **kanonische** Titel/URL geliefert.
|
||||
"""
|
||||
),
|
||||
response_description="Erfolg: PageInfo. Fehler: 404 'Page not found: <title>'.",
|
||||
openapi_extra={
|
||||
"x-codeSamples": [
|
||||
{"lang": "bash", "label": "curl", "source": "curl -s 'http://localhost:8000/import/wiki/info?title=Affenklatschen' | jq ."}
|
||||
]
|
||||
},
|
||||
)
|
||||
def page_info(title: str = Query(..., description="Seitentitel (unscharf; Varianten werden versucht)")):
|
||||
res = _fetch_pageinfo_batch([title])
|
||||
if res.get(title):
|
||||
d = res[title]
|
||||
return PageInfoResponse(pageid=d["pageid"], title=title, fullurl=d.get("fullurl", ""))
|
||||
|
||||
for v in _normalize_variants(title):
|
||||
if v == title:
|
||||
continue
|
||||
res2 = _fetch_pageinfo_batch([v])
|
||||
if res2.get(v):
|
||||
d = res2[v]
|
||||
return PageInfoResponse(pageid=d["pageid"], title=v, fullurl=d.get("fullurl", ""))
|
||||
|
||||
raise HTTPException(status_code=404, detail=f"Page not found: {title}")
|
||||
|
|
|
|||
26
schemas/plan_sessions.json
Normal file
26
schemas/plan_sessions.json
Normal file
|
|
@ -0,0 +1,26 @@
|
|||
{
|
||||
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
||||
"$id": "https://example.local/schemas/plan_sessions.json",
|
||||
"title": "PlanSession",
|
||||
"type": "object",
|
||||
"additionalProperties": false,
|
||||
"required": ["id", "plan_id", "executed_at", "location", "coach", "group_label", "feedback", "used_equipment"],
|
||||
"properties": {
|
||||
"id": { "type": "string" },
|
||||
"plan_id": { "type": "string" },
|
||||
"executed_at": { "type": "string", "format": "date-time" },
|
||||
"location": { "type": "string" },
|
||||
"coach": { "type": "string" },
|
||||
"group_label": { "type": "string" },
|
||||
"feedback": {
|
||||
"type": "object",
|
||||
"additionalProperties": false,
|
||||
"required": ["rating", "notes"],
|
||||
"properties": {
|
||||
"rating": { "type": "integer", "minimum": 1, "maximum": 5 },
|
||||
"notes": { "type": "string" }
|
||||
}
|
||||
},
|
||||
"used_equipment": { "type": "array", "items": { "type": "string" } }
|
||||
}
|
||||
}
|
||||
40
schemas/plan_templates.json
Normal file
40
schemas/plan_templates.json
Normal file
|
|
@ -0,0 +1,40 @@
|
|||
{
|
||||
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
||||
"$id": "https://example.local/schemas/plan_templates.json",
|
||||
"title": "PlanTemplate",
|
||||
"type": "object",
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"id", "name", "discipline", "age_group", "target_group", "total_minutes",
|
||||
"sections", "goals", "equipment_allowed", "created_by", "version"
|
||||
],
|
||||
"properties": {
|
||||
"id": { "type": "string" },
|
||||
"name": { "type": "string", "minLength": 1 },
|
||||
"discipline": { "type": "string" },
|
||||
"age_group": { "type": "string" },
|
||||
"target_group": { "type": "string" },
|
||||
"total_minutes": { "type": "integer", "minimum": 0 },
|
||||
"sections": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"additionalProperties": false,
|
||||
"required": ["name", "target_minutes", "must_keywords", "forbid_keywords", "capability_targets"],
|
||||
"properties": {
|
||||
"name": { "type": "string" },
|
||||
"target_minutes": { "type": "integer", "minimum": 0 },
|
||||
"must_keywords": { "type": "array", "items": { "type": "string" }, "default": [] },
|
||||
"ideal_keywords": { "type": "array", "items": { "type": "string" }, "default": [] },
|
||||
"supplement_keywords": { "type": "array", "items": { "type": "string" }, "default": [] },
|
||||
"forbid_keywords": { "type": "array", "items": { "type": "string" }, "default": [] },
|
||||
"capability_targets": { "type": "object", "additionalProperties": { "type": "integer" } }
|
||||
}
|
||||
}
|
||||
},
|
||||
"goals": { "type": "array", "items": { "type": "string" } },
|
||||
"equipment_allowed": { "type": "array", "items": { "type": "string" } },
|
||||
"created_by": { "type": "string" },
|
||||
"version": { "type": "string" }
|
||||
}
|
||||
}
|
||||
53
schemas/plans.json
Normal file
53
schemas/plans.json
Normal file
|
|
@ -0,0 +1,53 @@
|
|||
{
|
||||
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
||||
"$id": "https://example.local/schemas/plans.json",
|
||||
"title": "Plan",
|
||||
"type": "object",
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"id", "template_id", "title", "discipline", "age_group", "target_group",
|
||||
"total_minutes", "sections", "goals", "capability_summary", "created_by",
|
||||
"created_at", "source", "fingerprint"
|
||||
],
|
||||
"properties": {
|
||||
"id": { "type": "string" },
|
||||
"template_id": { "type": ["string", "null"] },
|
||||
"title": { "type": "string", "minLength": 1 },
|
||||
"discipline": { "type": "string" },
|
||||
"age_group": { "type": "string" },
|
||||
"target_group": { "type": "string" },
|
||||
"total_minutes": { "type": "integer", "minimum": 0 },
|
||||
"sections": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"additionalProperties": false,
|
||||
"required": ["name", "items", "minutes"],
|
||||
"properties": {
|
||||
"name": { "type": "string" },
|
||||
"minutes": { "type": "integer", "minimum": 0 },
|
||||
"items": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"additionalProperties": false,
|
||||
"required": ["exercise_external_id", "duration", "why"],
|
||||
"properties": {
|
||||
"exercise_external_id": { "type": "string" },
|
||||
"duration": { "type": "integer", "minimum": 0 },
|
||||
"why": { "type": "string" }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"goals": { "type": "array", "items": { "type": "string" } },
|
||||
"capability_summary": { "type": "object", "additionalProperties": { "type": "integer" } },
|
||||
"novelty_against_last_n": { "type": ["number", "null"] },
|
||||
"fingerprint": { "type": "string" },
|
||||
"created_by": { "type": "string" },
|
||||
"created_at": { "type": "string", "format": "date-time" },
|
||||
"source": { "type": "string" }
|
||||
}
|
||||
}
|
||||
95
scripts/backfill_capability_facets.py
Normal file
95
scripts/backfill_capability_facets.py
Normal file
|
|
@ -0,0 +1,95 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Backfill Capability-Facetten in Qdrant – v1.2
|
||||
Fix: beendet korrekt, wenn `next_page_offset` (offset) None ist.
|
||||
"""
|
||||
import os
|
||||
from typing import Dict, Any, List
|
||||
from qdrant_client import QdrantClient
|
||||
|
||||
COLL = os.getenv("EXERCISE_COLLECTION", "exercises")
|
||||
QDRANT_HOST = os.getenv("QDRANT_HOST", "localhost")
|
||||
QDRANT_PORT = int(os.getenv("QDRANT_PORT", "6333"))
|
||||
BATCH = int(os.getenv("BACKFILL_BATCH", "256"))
|
||||
|
||||
|
||||
def _facet_capabilities(caps: Dict[str, Any]) -> Dict[str, List[str]]:
|
||||
caps = caps or {}
|
||||
def names_where(pred) -> List[str]:
|
||||
out = []
|
||||
for k, v in caps.items():
|
||||
try:
|
||||
iv = int(v)
|
||||
except Exception:
|
||||
iv = 0
|
||||
if pred(iv):
|
||||
s = str(k).strip()
|
||||
if s:
|
||||
out.append(s)
|
||||
return sorted({s for s in out}, key=str.casefold)
|
||||
|
||||
all_keys = sorted({str(k).strip() for k in caps.keys() if str(k).strip()}, key=str.casefold)
|
||||
return {
|
||||
"capability_keys": all_keys,
|
||||
"capability_ge1": names_where(lambda lv: lv >= 1),
|
||||
"capability_ge2": names_where(lambda lv: lv >= 2),
|
||||
"capability_ge3": names_where(lambda lv: lv >= 3),
|
||||
"capability_ge4": names_where(lambda lv: lv >= 4),
|
||||
"capability_ge5": names_where(lambda lv: lv >= 5),
|
||||
"capability_eq1": names_where(lambda lv: lv == 1),
|
||||
"capability_eq2": names_where(lambda lv: lv == 2),
|
||||
"capability_eq3": names_where(lambda lv: lv == 3),
|
||||
"capability_eq4": names_where(lambda lv: lv == 4),
|
||||
"capability_eq5": names_where(lambda lv: lv == 5),
|
||||
}
|
||||
|
||||
|
||||
def main() -> None:
|
||||
client = QdrantClient(host=QDRANT_HOST, port=QDRANT_PORT)
|
||||
info = client.get_collection(COLL)
|
||||
print(f"[Backfill] Collection '{COLL}' ok – vectors={info.config.params.vectors}")
|
||||
|
||||
updated_total = 0
|
||||
offset = None
|
||||
page = 0
|
||||
|
||||
while True:
|
||||
points, next_offset = client.scroll(
|
||||
collection_name=COLL,
|
||||
scroll_filter=None,
|
||||
offset=offset,
|
||||
limit=BATCH,
|
||||
with_payload=True,
|
||||
)
|
||||
page += 1
|
||||
if not points:
|
||||
print("[Backfill] no more points – done")
|
||||
break
|
||||
|
||||
updated_page = 0
|
||||
for pt in points:
|
||||
pld = pt.payload or {}
|
||||
caps = pld.get("capabilities") or {}
|
||||
facets = _facet_capabilities(caps)
|
||||
|
||||
# nur setzen, wenn sich etwas ändert
|
||||
need = any(pld.get(k) != v for k, v in facets.items())
|
||||
if not need:
|
||||
continue
|
||||
client.set_payload(collection_name=COLL, points=[pt.id], payload=facets)
|
||||
updated_total += 1
|
||||
updated_page += 1
|
||||
|
||||
print(f"[Backfill] page={page} processed={len(points)} updated_page={updated_page} updated_total={updated_total}")
|
||||
|
||||
# Ende erreicht? Dann nach dieser Seite aussteigen.
|
||||
if next_offset is None:
|
||||
break
|
||||
offset = next_offset
|
||||
|
||||
print(f"[Backfill] done. total_updated={updated_total}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
62
scripts/bootstrap_qdrant_exercises.py
Normal file
62
scripts/bootstrap_qdrant_exercises.py
Normal file
|
|
@ -0,0 +1,62 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Idempotenter Bootstrap für die Qdrant-Collection 'exercises':
|
||||
- Legt fehlende Payload-Indizes an (ohne Collection zu droppen)
|
||||
- Optional prüft Dimension/Distanz
|
||||
|
||||
Starten mit
|
||||
pip install qdrant-client --upgrade
|
||||
python3 scripts/bootstrap_qdrant_exercises.py
|
||||
|
||||
"""
|
||||
|
||||
import os
|
||||
from qdrant_client import QdrantClient
|
||||
from qdrant_client.models import PayloadSchemaType
|
||||
|
||||
QDRANT_HOST = os.getenv("QDRANT_HOST", "localhost")
|
||||
QDRANT_PORT = int(os.getenv("QDRANT_PORT", "6333"))
|
||||
COLLECTION = os.getenv("EXERCISE_COLLECTION", "exercises")
|
||||
|
||||
INDEX_SPECS = [
|
||||
("keywords", PayloadSchemaType.KEYWORD),
|
||||
("equipment", PayloadSchemaType.KEYWORD),
|
||||
("discipline", PayloadSchemaType.KEYWORD),
|
||||
("age_group", PayloadSchemaType.KEYWORD),
|
||||
("target_group", PayloadSchemaType.KEYWORD),
|
||||
("category", PayloadSchemaType.KEYWORD),
|
||||
("capability_keys", PayloadSchemaType.KEYWORD),
|
||||
("capability_ge1", PayloadSchemaType.KEYWORD),
|
||||
("capability_ge2", PayloadSchemaType.KEYWORD),
|
||||
("capability_ge3", PayloadSchemaType.KEYWORD),
|
||||
("capability_ge4", PayloadSchemaType.KEYWORD), # neu
|
||||
("capability_ge5", PayloadSchemaType.KEYWORD), # neu
|
||||
("capability_eq1", PayloadSchemaType.KEYWORD), # neu
|
||||
("capability_eq2", PayloadSchemaType.KEYWORD), # neu
|
||||
("capability_eq3", PayloadSchemaType.KEYWORD), # neu
|
||||
("capability_eq4", PayloadSchemaType.KEYWORD), # neu
|
||||
("capability_eq5", PayloadSchemaType.KEYWORD), # neu
|
||||
]
|
||||
|
||||
def main():
|
||||
client = QdrantClient(host=QDRANT_HOST, port=QDRANT_PORT)
|
||||
# Sanity: Collection existiert?
|
||||
info = client.get_collection(COLLECTION) # wirf Fehler, wenn nicht vorhanden
|
||||
print(f"[Bootstrap] Collection '{COLLECTION}' gefunden. Vectors={info.config.params.vectors}")
|
||||
|
||||
# Indizes idempotent anlegen
|
||||
for field, schema in INDEX_SPECS:
|
||||
try:
|
||||
client.create_payload_index(collection_name=COLLECTION,
|
||||
field_name=field,
|
||||
field_schema=schema)
|
||||
print(f"[Bootstrap] Index created: {field} ({schema})")
|
||||
except Exception as e:
|
||||
# Qdrant wirft hier Exceptions, wenn der Index schon existiert – das ist ok
|
||||
print(f"[Bootstrap] Index exists or skipped: {field} -> {e}")
|
||||
|
||||
print("[Bootstrap] done.")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
80
scripts/bootstrap_qdrant_plans.py
Normal file
80
scripts/bootstrap_qdrant_plans.py
Normal file
|
|
@ -0,0 +1,80 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Bootstrap (idempotent) für Qdrant-Collections rund um Pläne – v1.3.0
|
||||
|
||||
- Fügt fehlende Payload-Indizes hinzu (KEYWORD/FLOAT), idempotent.
|
||||
- NEU: FLOAT-Index `plans.created_at_ts` für serverseitige Zeitfensterfilter.
|
||||
"""
|
||||
import os
|
||||
from qdrant_client import QdrantClient
|
||||
from qdrant_client.models import PayloadSchemaType
|
||||
|
||||
QDRANT_HOST = os.getenv("QDRANT_HOST", "localhost")
|
||||
QDRANT_PORT = int(os.getenv("QDRANT_PORT", "6333"))
|
||||
|
||||
PLANS = os.getenv("PLAN_COLLECTION") or os.getenv("QDRANT_COLLECTION_PLANS", "plans")
|
||||
TEMPLATES = os.getenv("PLAN_TEMPLATE_COLLECTION", "plan_templates")
|
||||
SESSIONS = os.getenv("PLAN_SESSION_COLLECTION", "plan_sessions")
|
||||
|
||||
INDEX_SPECS = {
|
||||
TEMPLATES: [
|
||||
("discipline", PayloadSchemaType.KEYWORD),
|
||||
("age_group", PayloadSchemaType.KEYWORD),
|
||||
("target_group", PayloadSchemaType.KEYWORD),
|
||||
("section_names", PayloadSchemaType.KEYWORD),
|
||||
("section_must_keywords", PayloadSchemaType.KEYWORD),
|
||||
("section_ideal_keywords", PayloadSchemaType.KEYWORD),
|
||||
("section_supplement_keywords", PayloadSchemaType.KEYWORD),
|
||||
("section_forbid_keywords", PayloadSchemaType.KEYWORD),
|
||||
("goals", PayloadSchemaType.KEYWORD),
|
||||
],
|
||||
PLANS: [
|
||||
("discipline", PayloadSchemaType.KEYWORD),
|
||||
("age_group", PayloadSchemaType.KEYWORD),
|
||||
("target_group", PayloadSchemaType.KEYWORD),
|
||||
("sections.name", PayloadSchemaType.KEYWORD), # legacy, belassen
|
||||
("plan_section_names", PayloadSchemaType.KEYWORD),
|
||||
("goals", PayloadSchemaType.KEYWORD),
|
||||
("created_by", PayloadSchemaType.KEYWORD),
|
||||
("created_at", PayloadSchemaType.KEYWORD),
|
||||
("created_at_ts", PayloadSchemaType.FLOAT), # NEU
|
||||
("fingerprint", PayloadSchemaType.KEYWORD),
|
||||
("title", PayloadSchemaType.KEYWORD),
|
||||
],
|
||||
SESSIONS: [
|
||||
("plan_id", PayloadSchemaType.KEYWORD),
|
||||
("executed_at", PayloadSchemaType.KEYWORD),
|
||||
("coach", PayloadSchemaType.KEYWORD),
|
||||
("group_label", PayloadSchemaType.KEYWORD),
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
def _create_indexes(client: QdrantClient, collection: str, specs):
|
||||
try:
|
||||
client.get_collection(collection)
|
||||
print(f"[Bootstrap v1.3.0] Collection '{collection}' ok.")
|
||||
except Exception as e:
|
||||
print(f"[Bootstrap v1.3.0] WARN: Collection '{collection}' nicht gefunden (wird beim ersten Upsert erstellt). Details: {e}")
|
||||
return
|
||||
|
||||
for field, schema in specs:
|
||||
try:
|
||||
client.create_payload_index(collection_name=collection, field_name=field, field_schema=schema)
|
||||
print(f"[Bootstrap v1.3.0] Index created: {collection}.{field} ({schema})")
|
||||
except Exception as e:
|
||||
print(f"[Bootstrap v1.3.0] Index exists or skipped: {collection}.{field} -> {e}")
|
||||
|
||||
|
||||
def main():
|
||||
print(f"[Bootstrap v1.3.0] Qdrant @ {QDRANT_HOST}:{QDRANT_PORT}")
|
||||
print(f"[Bootstrap v1.3.0] Collections: TEMPLATES={TEMPLATES} PLANS={PLANS} SESSIONS={SESSIONS}")
|
||||
client = QdrantClient(host=QDRANT_HOST, port=QDRANT_PORT)
|
||||
for coll, specs in INDEX_SPECS.items():
|
||||
_create_indexes(client, coll, specs)
|
||||
print("[Bootstrap v1.3.0] done.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
255
scripts/test_llm_api.py
Normal file
255
scripts/test_llm_api.py
Normal file
|
|
@ -0,0 +1,255 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Erweiterter API-Schnelltest - v2.1
|
||||
|
||||
Änderungen ggü. v2.0:
|
||||
- **Login VOR** /semantic/pages (viele Wikis liefern Ask-Ergebnisse nur authentifiziert)
|
||||
- Robuster Fallback: Wenn /semantic/pages {} liefert → optionaler ENV-Fallback `TEST_WIKI_TITLE`
|
||||
- Bessere Diagnoseausgaben (zeigt Basis-URL, Category, gesetzte Login-ENV)
|
||||
|
||||
ENV:
|
||||
BASE_URL (default http://127.0.0.1:8000)
|
||||
TEST_WIKI_CATEGORY (default "Übungen")
|
||||
TEST_WIKI_TITLE (optional Fallback-Titel, z. B. "Affenklatschen")
|
||||
WIKI_BOT_USER, WIKI_BOT_PASSWORD (optional; empfohlen)
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
import uuid
|
||||
import requests
|
||||
|
||||
BASE = os.getenv("BASE_URL", "http://127.0.0.1:8000").rstrip("/")
|
||||
TEST_WIKI_CATEGORY = os.getenv("TEST_WIKI_CATEGORY", "Übungen")
|
||||
TEST_WIKI_TITLE = os.getenv("TEST_WIKI_TITLE")
|
||||
WIKI_USER = os.getenv("WIKI_BOT_USER")
|
||||
WIKI_PASS = os.getenv("WIKI_BOT_PASSWORD")
|
||||
|
||||
print("\nConfig:")
|
||||
print(" BASE_URL=", BASE)
|
||||
print(" TEST_WIKI_CATEGORY=", TEST_WIKI_CATEGORY)
|
||||
print(" TEST_WIKI_TITLE=", TEST_WIKI_TITLE or "<none>")
|
||||
print(" WIKI_BOT_USER set=", bool(WIKI_USER))
|
||||
|
||||
# ---- helpers ----
|
||||
|
||||
def fail(msg, resp: requests.Response | None = None):
|
||||
print("✗", msg)
|
||||
if resp is not None:
|
||||
try:
|
||||
print(" → status:", resp.status_code)
|
||||
print(" → body:", resp.text[:800])
|
||||
except Exception:
|
||||
pass
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def ok(msg):
|
||||
print("✓", msg)
|
||||
|
||||
|
||||
# ---- basic ----
|
||||
|
||||
def test_openapi():
|
||||
r = requests.get(f"{BASE}/openapi.json")
|
||||
if r.status_code != 200:
|
||||
fail("/openapi.json nicht erreichbar", r)
|
||||
ok("OpenAPI erreichbar")
|
||||
|
||||
|
||||
# ---- wiki ----
|
||||
|
||||
def test_wiki_health():
|
||||
r = requests.get(f"{BASE}/import/wiki/health", params={"verbose": 1})
|
||||
if r.status_code != 200 or r.json().get("status") != "ok":
|
||||
fail("/import/wiki/health fehlgeschlagen", r)
|
||||
ok("Wiki /health ok")
|
||||
|
||||
|
||||
def test_wiki_login_if_env():
|
||||
if not (WIKI_USER and WIKI_PASS):
|
||||
print("⚠️ Wiki /login übersprungen (WIKI_BOT_USER/PASSWORD nicht gesetzt)")
|
||||
return False
|
||||
r = requests.post(f"{BASE}/import/wiki/login", json={"username": WIKI_USER, "password": WIKI_PASS})
|
||||
if r.status_code != 200 or r.json().get("status") != "success":
|
||||
fail("/import/wiki/login fehlgeschlagen", r)
|
||||
ok("Wiki /login ok")
|
||||
return True
|
||||
|
||||
|
||||
def get_wiki_title_and_pageid() -> tuple[str, int]:
|
||||
# 1) Versuch über semantic/pages
|
||||
r = requests.get(f"{BASE}/import/wiki/semantic/pages", params={"category": TEST_WIKI_CATEGORY})
|
||||
if r.status_code != 200:
|
||||
fail("/import/wiki/semantic/pages fehlgeschlagen", r)
|
||||
data = r.json()
|
||||
if isinstance(data, dict) and data:
|
||||
title = next(iter(data.keys()))
|
||||
entry = data[title]
|
||||
pid = entry.get("pageid")
|
||||
if not pid:
|
||||
fail("/semantic/pages ohne pageid nach Enrichment", r)
|
||||
ok(f"Wiki /semantic/pages ok – Beispiel: '{title}' (pageid={pid})")
|
||||
return title, int(pid)
|
||||
|
||||
print("ℹ️ /semantic/pages lieferte keine Titel ({}). Versuche Fallback...")
|
||||
# 2) Fallback über TEST_WIKI_TITLE
|
||||
if not TEST_WIKI_TITLE:
|
||||
fail("Kein TEST_WIKI_TITLE gesetzt und /semantic/pages leer – kann keinen Titel testen.")
|
||||
r2 = requests.get(f"{BASE}/import/wiki/info", params={"title": TEST_WIKI_TITLE})
|
||||
if r2.status_code != 200:
|
||||
fail("/import/wiki/info(Fallback) fehlgeschlagen", r2)
|
||||
js = r2.json()
|
||||
pid = js.get("pageid")
|
||||
if not pid:
|
||||
fail("/info(Fallback) ohne pageid", r2)
|
||||
ok(f"Wiki /info Fallback ok – '{TEST_WIKI_TITLE}' (pageid={pid})")
|
||||
return TEST_WIKI_TITLE, int(pid)
|
||||
|
||||
|
||||
def test_wiki_info(title: str):
|
||||
r = requests.get(f"{BASE}/import/wiki/info", params={"title": title})
|
||||
if r.status_code != 200:
|
||||
fail("/import/wiki/info fehlgeschlagen", r)
|
||||
js = r.json()
|
||||
if not js.get("pageid"):
|
||||
fail("/info ohne pageid", r)
|
||||
ok("Wiki /info ok")
|
||||
|
||||
|
||||
def test_wiki_parse(pageid: int, title: str):
|
||||
r = requests.get(f"{BASE}/import/wiki/parsepage", params={"pageid": pageid, "title": title})
|
||||
if r.status_code != 200:
|
||||
fail("/import/wiki/parsepage fehlgeschlagen", r)
|
||||
js = r.json()
|
||||
if not isinstance(js.get("wikitext"), str):
|
||||
fail("/parsepage ohne wikitext", r)
|
||||
ok("Wiki /parsepage ok")
|
||||
|
||||
|
||||
# ---- exercise ----
|
||||
|
||||
def make_exercise_payload(external_id: str):
|
||||
return {
|
||||
"external_id": external_id,
|
||||
"fingerprint": "unit-test-sha",
|
||||
"source": "UnitTest",
|
||||
"title": "Testübung Reaktion",
|
||||
"summary": "Kurzbeschreibung für Test.",
|
||||
"short_description": "Kurzbeschreibung für Test.",
|
||||
"keywords": ["Reaktion", "Bälle", "Bälle"],
|
||||
"link": "http://example.local",
|
||||
"discipline": "Karate",
|
||||
"group": "5",
|
||||
"age_group": "Teenager",
|
||||
"target_group": "Breitensport",
|
||||
"min_participants": 1,
|
||||
"duration_minutes": 10,
|
||||
"capabilities": {"Reaktionsfähigkeit": 2, "Kopplungsfähigkeit": 1},
|
||||
"category": "Übungen",
|
||||
"purpose": "Aufwärmen",
|
||||
"execution": "Einfacher Ablauf.",
|
||||
"notes": "Hinweise.",
|
||||
"preparation": "Bälle holen.",
|
||||
"method": "Frontale Methode",
|
||||
"equipment": ["Bälle", "Pratze"]
|
||||
}
|
||||
|
||||
|
||||
def test_exercise_upsert_and_idempotence():
|
||||
import time
|
||||
ext = f"ut:{uuid.uuid4()}"
|
||||
payload = make_exercise_payload(ext)
|
||||
|
||||
# create
|
||||
r1 = requests.post(f"{BASE}/exercise", json=payload)
|
||||
if r1.status_code != 200:
|
||||
fail("POST /exercise (create) fehlgeschlagen", r1)
|
||||
id1 = r1.json().get("id")
|
||||
if not id1:
|
||||
fail("POST /exercise lieferte keine id", r1)
|
||||
|
||||
# update (idempotent)
|
||||
r2 = requests.post(f"{BASE}/exercise", json=payload)
|
||||
if r2.status_code != 200:
|
||||
fail("POST /exercise (update) fehlgeschlagen", r2)
|
||||
id2 = r2.json().get("id")
|
||||
if id2 != id1:
|
||||
fail("Idempotenz verletzt: id hat sich geändert")
|
||||
|
||||
# by-external-id
|
||||
r3 = requests.get(f"{BASE}/exercise/by-external-id", params={"external_id": ext})
|
||||
if r3.status_code != 200:
|
||||
fail("GET /exercise/by-external-id fehlgeschlagen", r3)
|
||||
id3 = r3.json().get("id")
|
||||
if id3 != id1:
|
||||
fail("Lookup by external_id liefert andere id")
|
||||
|
||||
ok("Exercise Upsert & Idempotenz ok")
|
||||
return ext, id1
|
||||
|
||||
|
||||
def test_exercise_search_filter(ext_id: str):
|
||||
req = {
|
||||
"discipline": "Karate",
|
||||
"equipment_all": ["Bälle"],
|
||||
"capability_names": ["Reaktionsfähigkeit"],
|
||||
"capability_ge_level": 2,
|
||||
"limit": 10,
|
||||
}
|
||||
r = requests.post(f"{BASE}/exercise/search", json=req)
|
||||
if r.status_code != 200:
|
||||
fail("POST /exercise/search (Filter) fehlgeschlagen", r)
|
||||
js = r.json(); hits = js.get("hits", [])
|
||||
if not isinstance(hits, list) or not hits:
|
||||
fail("/exercise/search liefert keine Treffer (Filter)", r)
|
||||
if not any(h.get("payload", {}).get("external_id") == ext_id for h in hits):
|
||||
ok("Exercise Search (Filter) ok – Testpunkt nicht zwingend in Top-N")
|
||||
else:
|
||||
ok("Exercise Search (Filter) ok – Testpunkt gefunden")
|
||||
|
||||
|
||||
def test_exercise_search_vector():
|
||||
req = {
|
||||
"query": "Aufwärmen 10min, Reaktionsfähigkeit, Teenager, Bälle",
|
||||
"discipline": "Karate",
|
||||
"limit": 5
|
||||
}
|
||||
r = requests.post(f"{BASE}/exercise/search", json=req)
|
||||
if r.status_code != 200:
|
||||
fail("POST /exercise/search (Vector) fehlgeschlagen", r)
|
||||
js = r.json(); hits = js.get("hits", [])
|
||||
if not isinstance(hits, list):
|
||||
fail("/exercise/search (Vector) liefert ungültige Struktur", r)
|
||||
ok(f"Exercise Search (Vector) ok – {len(hits)} Treffer")
|
||||
|
||||
|
||||
def test_exercise_delete(ext_id: str):
|
||||
r = requests.delete(f"{BASE}/exercise/delete-by-external-id", params={"external_id": ext_id})
|
||||
if r.status_code != 200:
|
||||
fail("DELETE /exercise/delete-by-external-id fehlgeschlagen", r)
|
||||
ok("Exercise Delete-by-external-id ok")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
print("\nStarte API-Tests...\n")
|
||||
test_openapi()
|
||||
test_wiki_health()
|
||||
|
||||
# Login nun VOR Semantic Query
|
||||
did_login = test_wiki_login_if_env()
|
||||
|
||||
# Titel ermitteln (semantic/pages oder Fallback)
|
||||
title, pageid = get_wiki_title_and_pageid()
|
||||
test_wiki_info(title)
|
||||
test_wiki_parse(pageid, title)
|
||||
|
||||
# Exercise
|
||||
ext, _id = test_exercise_upsert_and_idempotence()
|
||||
test_exercise_search_filter(ext)
|
||||
test_exercise_search_vector()
|
||||
test_exercise_delete(ext)
|
||||
|
||||
print("\n🎉 Alle Tests erfolgreich durchlaufen!\n")
|
||||
241
scripts/test_plans_wp15.py
Normal file
241
scripts/test_plans_wp15.py
Normal file
|
|
@ -0,0 +1,241 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""Unit-, Integrations- und E2E-Tests für WP-15 (v1.2.0)."""
|
||||
import os
|
||||
import json
|
||||
import requests
|
||||
import pytest
|
||||
from datetime import datetime, timezone
|
||||
|
||||
BASE = os.getenv("BASE_URL", "http://127.0.0.1:8000").rstrip("/")
|
||||
QDRANT = os.getenv("QDRANT_BASE", "http://127.0.0.1:6333").rstrip("/")
|
||||
TPL_COLL = os.getenv("PLAN_TEMPLATE_COLLECTION", "plan_templates")
|
||||
|
||||
# ---------- Helpers ----------
|
||||
|
||||
def _fp_local(plan_payload: dict) -> str:
|
||||
import hashlib
|
||||
core = {
|
||||
"title": plan_payload["title"],
|
||||
"total_minutes": int(plan_payload["total_minutes"]),
|
||||
"items": [
|
||||
{"exercise_external_id": it["exercise_external_id"], "duration": int(it["duration"])}
|
||||
for sec in plan_payload["sections"]
|
||||
for it in sec.get("items", [])
|
||||
],
|
||||
}
|
||||
raw = json.dumps(core, sort_keys=True, ensure_ascii=False)
|
||||
return hashlib.sha256(raw.encode("utf-8")).hexdigest()
|
||||
|
||||
|
||||
def _unique_len_lower(xs):
|
||||
seen = set()
|
||||
out = []
|
||||
for x in xs:
|
||||
k = x.casefold()
|
||||
if k not in seen:
|
||||
seen.add(k)
|
||||
out.append(x)
|
||||
return len(out)
|
||||
|
||||
# ---------- Unit ----------
|
||||
|
||||
def test_fingerprint_unit_v12():
|
||||
p = {
|
||||
"title": "Montag – Reaktion",
|
||||
"total_minutes": 90,
|
||||
"sections": [
|
||||
{"name": "Warmup", "minutes": 15, "items": [
|
||||
{"exercise_external_id": "ex:001", "duration": 10, "why": "Aufwärmen"}
|
||||
]}
|
||||
],
|
||||
}
|
||||
fp1 = _fp_local(p)
|
||||
p2 = json.loads(json.dumps(p, ensure_ascii=False))
|
||||
fp2 = _fp_local(p2)
|
||||
assert fp1 == fp2
|
||||
|
||||
# ---------- Integration: Templates mit mehreren Sections + ideal/supplement ----------
|
||||
|
||||
def test_template_sections_ideal_supplement_roundtrip():
|
||||
tpl = {
|
||||
"name": "Std 90 v1.1",
|
||||
"discipline": "Karate",
|
||||
"age_group": "Teenager",
|
||||
"target_group": "Breitensport",
|
||||
"total_minutes": 90,
|
||||
"sections": [
|
||||
{
|
||||
"name": "Warmup",
|
||||
"target_minutes": 15,
|
||||
"must_keywords": ["Reaktion", "reaktion"], # Duplikat in anderer Schreibweise
|
||||
"ideal_keywords": ["Koordination", "koordination"], # Duplikat in anderer Schreibweise
|
||||
"supplement_keywords": ["Teamspiel", " Teamspiel "], # Duplikat mit Whitespace
|
||||
"forbid_keywords": [],
|
||||
"capability_targets": {"Reaktionsfähigkeit": 2, "Mobilität": 1}
|
||||
},
|
||||
{
|
||||
"name": "Technikblock",
|
||||
"target_minutes": 30,
|
||||
"must_keywords": ["Mae-Geri"],
|
||||
"ideal_keywords": ["Timing"],
|
||||
"supplement_keywords": ["Partnerarbeit"],
|
||||
"forbid_keywords": ["Bodenarbeit"],
|
||||
"capability_targets": {"Technikpräzision": 2, "Schnelligkeit": 1}
|
||||
}
|
||||
],
|
||||
"goals": ["Technik", "Kondition"],
|
||||
"equipment_allowed": ["Bälle"],
|
||||
"created_by": "tester",
|
||||
"version": "1.1"
|
||||
}
|
||||
|
||||
r = requests.post(f"{BASE}/plan_templates", json=tpl)
|
||||
assert r.status_code == 200, r.text
|
||||
tpl_id = r.json()["id"]
|
||||
|
||||
r2 = requests.get(f"{BASE}/plan_templates/{tpl_id}")
|
||||
assert r2.status_code == 200, r2.text
|
||||
got = r2.json()
|
||||
|
||||
# Prüfen: Beide Sections vorhanden
|
||||
assert len(got["sections"]) == 2
|
||||
|
||||
s1 = got["sections"][0]
|
||||
# Normalisierung: Duplikate entfernt (case-insensitive), Whitespace getrimmt
|
||||
assert _unique_len_lower(s1["must_keywords"]) == len(s1["must_keywords"]) == 1
|
||||
assert _unique_len_lower(s1["ideal_keywords"]) == len(s1["ideal_keywords"]) == 1
|
||||
assert _unique_len_lower(s1["supplement_keywords"]) == len(s1["supplement_keywords"]) == 1
|
||||
|
||||
# ---------- Optional: Qdrant-Payload-Check (materialisierte Felder) ----------
|
||||
|
||||
def test_qdrant_materialized_fields_template():
|
||||
"""
|
||||
Robust: Erst frisches Template anlegen (mit neuen Feldern), dann Qdrant-Scroll
|
||||
mit Filter auf genau diese Template-ID. So treffen wir sicher einen Punkt,
|
||||
der die materialisierten Felder enthält – unabhängig von älteren Datensätzen.
|
||||
"""
|
||||
# 1) Frisches Template erzeugen (mit ideal/supplement)
|
||||
tpl = {
|
||||
"name": "Std 90 v1.1 – materialized-check",
|
||||
"discipline": "Karate",
|
||||
"age_group": "Teenager",
|
||||
"target_group": "Breitensport",
|
||||
"total_minutes": 90,
|
||||
"sections": [
|
||||
{
|
||||
"name": "Warmup",
|
||||
"target_minutes": 15,
|
||||
"must_keywords": ["Reaktion"],
|
||||
"ideal_keywords": ["Koordination"],
|
||||
"supplement_keywords": ["Teamspiel"],
|
||||
"forbid_keywords": [],
|
||||
"capability_targets": {"Reaktionsfähigkeit": 2}
|
||||
}
|
||||
],
|
||||
"goals": ["Technik"],
|
||||
"equipment_allowed": ["Bälle"],
|
||||
"created_by": "tester",
|
||||
"version": "1.1"
|
||||
}
|
||||
r = requests.post(f"{BASE}/plan_templates", json=tpl)
|
||||
assert r.status_code == 200, r.text
|
||||
tpl_id = r.json()["id"]
|
||||
|
||||
# 2) Qdrant gezielt nach genau diesem Punkt scrollen (Payload enthält id)
|
||||
try:
|
||||
rq = requests.post(
|
||||
f"{QDRANT}/collections/{TPL_COLL}/points/scroll",
|
||||
json={
|
||||
"with_payload": True,
|
||||
"limit": 1,
|
||||
"filter": {"must": [{"key": "id", "match": {"value": tpl_id}}]}
|
||||
},
|
||||
timeout=2.0,
|
||||
)
|
||||
except Exception:
|
||||
pytest.skip("Qdrant nicht erreichbar – überspringe materialisierte Feldprüfung")
|
||||
|
||||
if rq.status_code != 200:
|
||||
pytest.skip(f"Qdrant-Scroll liefert {rq.status_code}")
|
||||
|
||||
js = rq.json()
|
||||
pts = (js.get("result") or {}).get("points") or []
|
||||
if not pts:
|
||||
pytest.skip("Keine Übereinstimmung in plan_templates – überspringe")
|
||||
|
||||
payload = pts[0].get("payload") or {}
|
||||
for key in [
|
||||
"section_names",
|
||||
"section_must_keywords",
|
||||
"section_ideal_keywords",
|
||||
"section_supplement_keywords",
|
||||
"section_forbid_keywords",
|
||||
]:
|
||||
assert key in payload
|
||||
assert isinstance(payload[key], list)
|
||||
|
||||
# ---------- E2E: Plan anlegen + Idempotenz trotz variierender Nebenfelder ----------
|
||||
|
||||
def test_plan_e2e_idempotence_same_fingerprint():
|
||||
# Template minimal für Bezug
|
||||
tpl = {
|
||||
"name": "Std 90 for plan",
|
||||
"discipline": "Karate",
|
||||
"age_group": "Teenager",
|
||||
"target_group": "Breitensport",
|
||||
"total_minutes": 90,
|
||||
"sections": [
|
||||
{"name": "Warmup", "target_minutes": 15, "must_keywords": [], "forbid_keywords": [], "capability_targets": {}}
|
||||
],
|
||||
"goals": [],
|
||||
"equipment_allowed": [],
|
||||
"created_by": "tester",
|
||||
"version": "1.0"
|
||||
}
|
||||
r1 = requests.post(f"{BASE}/plan_templates", json=tpl)
|
||||
assert r1.status_code == 200
|
||||
tpl_id = r1.json()["id"]
|
||||
|
||||
plan_base = {
|
||||
"template_id": tpl_id,
|
||||
"title": "KW32 – Montag",
|
||||
"discipline": "Karate",
|
||||
"age_group": "Teenager",
|
||||
"target_group": "Breitensport",
|
||||
"total_minutes": 90,
|
||||
"sections": [
|
||||
{"name": "Warmup", "minutes": 15, "items": [
|
||||
{"exercise_external_id": "ex:001", "duration": 10, "why": "Aufwärmen"}
|
||||
]}
|
||||
],
|
||||
"created_by": "tester",
|
||||
"created_at": datetime.now(timezone.utc).isoformat(),
|
||||
"source": "test"
|
||||
}
|
||||
|
||||
# Plan A mit goals/capability_summary
|
||||
plan_a = dict(plan_base)
|
||||
plan_a.update({
|
||||
"goals": ["Technik"],
|
||||
"capability_summary": {"Reaktionsfähigkeit": 2}
|
||||
})
|
||||
r2 = requests.post(f"{BASE}/plan", json=plan_a)
|
||||
assert r2.status_code == 200, r2.text
|
||||
plan_id = r2.json()["id"]
|
||||
|
||||
# Plan B – gleicher Fingerprint (gleiche items), aber andere Nebenfelder
|
||||
plan_b = dict(plan_base)
|
||||
plan_b.update({
|
||||
"goals": ["Kondition"],
|
||||
"capability_summary": {"Reaktionsfähigkeit": 3}
|
||||
})
|
||||
r3 = requests.post(f"{BASE}/plan", json=plan_b)
|
||||
assert r3.status_code == 200
|
||||
assert r3.json()["id"] == plan_id, "Idempotenz verletzt: gleicher Fingerprint muss gleiche ID liefern"
|
||||
|
||||
# GET prüfen
|
||||
r4 = requests.get(f"{BASE}/plan/{plan_id}")
|
||||
assert r4.status_code == 200
|
||||
js = r4.json()
|
||||
assert js["title"] == "KW32 – Montag"
|
||||
|
|
@ -1,183 +1,436 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Module: wiki_importer.py
|
||||
Beschreibung:
|
||||
- Importiert Übungen aus dem MediaWiki via FastAPI wiki_router
|
||||
- Führt vor dem Import einen Login gegen /import/wiki/login durch (falls nicht via --skip-login deaktiviert)
|
||||
- Holt Liste aller Übungs-Titel (SMW-Ask) via `/semantic/pages`
|
||||
- Für jede Übung:
|
||||
* Fetch pageinfo (pageid, fullurl) via `/info`
|
||||
* Parse Wikitext (Templates: ÜbungInfoBox, Übungsbeschreibung, SkillDevelopment) via `/parsepage`
|
||||
* Baut Payload entsprechend Exercise-Datenmodell
|
||||
* POST an `/exercise` Endpoint (exercise_router)
|
||||
- Unterstützt Single-Import via `--title` (oder ENV `WIKI_EXERCISE_TITLE`) und Full-Import via `--all`
|
||||
- Optional: Credentials via CLI (--username/--password) oder `.env` (WIKI_BOT_USER / WIKI_BOT_PASSWORD)
|
||||
wiki_importer.py – v2.3.8
|
||||
|
||||
Version: 2.1.0
|
||||
Ziel dieses Patches: Die Felder `discipline`, `execution`, `keywords`, `equipment`, `duration_minutes` usw.
|
||||
kommen bei dir teilweise leer an. Ursache sind zu aggressive Normalisierungen/Matcher.
|
||||
|
||||
Fix (konservativ & robust):
|
||||
- Parser liest jetzt **gezielt** die bekannten Templates **ohne** Over-Normalisierung:
|
||||
• `{{ÜbungInfoBox}}` / `{{UebungInfoBox}}`
|
||||
• `{{Übungsbeschreibung}}` / `{{Uebungsbeschreibung}}`
|
||||
• `{{Hilfsmittel}}`
|
||||
• `{{SkillDevelopment}}`
|
||||
- Feld-Extraktion nutzt **zuerst die exakten Wiki-Parameternamen** (deutsch/mit Umlauten),
|
||||
erst danach schmale Synonym-Fallbacks. Das stellt sicher, dass z. B. `Schlüsselworte=`
|
||||
wirklich in `keywords` landet.
|
||||
- `imported_at` wird bei **Create und Update** gesetzt.
|
||||
- Optionales Debugging: `--debug-raw` druckt die gefundenen Raw-Keys (einfach, nachvollziehbar).
|
||||
|
||||
Bestehende API-Endpunkte bleiben unverändert.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import argparse
|
||||
from typing import Dict, Any
|
||||
from typing import Dict, Any, Tuple, Optional, List
|
||||
import requests
|
||||
import mwparserfromhell
|
||||
from dotenv import load_dotenv
|
||||
import hashlib
|
||||
import json
|
||||
import time
|
||||
|
||||
# ----- Konfiguration / Defaults -----
|
||||
load_dotenv() # .env laden, falls vorhanden
|
||||
load_dotenv()
|
||||
API_BASE_URL = os.getenv("API_BASE_URL", "http://localhost:8000/import/wiki")
|
||||
EXERCISE_API = os.getenv("EXERCISE_API_URL", "http://localhost:8000/exercise")
|
||||
DEFAULT_CAT = os.getenv("WIKI_CATEGORY", "Übungen")
|
||||
DEFAULT_TITLE = os.getenv("WIKI_EXERCISE_TITLE", "Affenklatschen")
|
||||
REQUEST_TIMEOUT = int(os.getenv("REQUEST_TIMEOUT", "60"))
|
||||
|
||||
API_BASE_URL = os.getenv("API_BASE_URL", "http://localhost:8000/import/wiki") # FastAPI-Wiki-Proxy
|
||||
EXERCISE_API = os.getenv("EXERCISE_API_URL", "http://localhost:8000/exercise") # Exercise-Endpoint
|
||||
DEFAULT_CAT = os.getenv("WIKI_CATEGORY", "Übungen")
|
||||
DEFAULT_TITLE = os.getenv("WIKI_EXERCISE_TITLE", "Affenklatschen")
|
||||
# ----- Helpers für Wiki-Router -----
|
||||
|
||||
# ---- Hilfsfunktionen für Wiki-Router ----
|
||||
def wiki_health() -> None:
|
||||
r = requests.get(f"{API_BASE_URL}/health", timeout=15)
|
||||
r.raise_for_status()
|
||||
print("[Sanity] Wiki health OK")
|
||||
|
||||
|
||||
def wiki_login(username: str, password: str) -> None:
|
||||
"""
|
||||
Führt einen Login gegen den wiki_router durch.
|
||||
Erwartet: {"status":"success"} bei Erfolg.
|
||||
"""
|
||||
payload = {"username": username, "password": password}
|
||||
r = requests.post(f"{API_BASE_URL}/login", json=payload, timeout=30)
|
||||
# kein raise_for_status(), wir wollen die JSON-Fehler sauber ausgeben
|
||||
try:
|
||||
data = r.json()
|
||||
except Exception:
|
||||
print(f"[Login] HTTP {r.status_code}: {r.text}")
|
||||
r.raise_for_status()
|
||||
|
||||
status = (data or {}).get("status")
|
||||
if status != "success":
|
||||
msg = (data or {}).get("message", "Login fehlgeschlagen")
|
||||
raise RuntimeError(f"[Login] {msg}")
|
||||
print("[Login] success")
|
||||
|
||||
|
||||
def fetch_all_pages(category: str) -> Dict[str, Any]:
|
||||
resp = requests.get(f"{API_BASE_URL}/semantic/pages", params={"category": category}, timeout=60)
|
||||
resp = requests.get(f"{API_BASE_URL}/semantic/pages", params={"category": category}, timeout=REQUEST_TIMEOUT)
|
||||
resp.raise_for_status()
|
||||
return resp.json()
|
||||
|
||||
|
||||
def fetch_page_info(title: str) -> Dict[str, Any]:
|
||||
r = requests.get(f"{API_BASE_URL}/info", params={"title": title}, timeout=30)
|
||||
r.raise_for_status()
|
||||
info = r.json()
|
||||
return {"pageid": info.get("pageid"), "fullurl": info.get("fullurl")}
|
||||
|
||||
# ----- Parser (konservativ) -----
|
||||
|
||||
T_INFOS = {"ÜbungInfoBox", "UebungInfoBox"}
|
||||
T_BESCHR = {"Übungsbeschreibung", "Uebungsbeschreibung"}
|
||||
T_HILFS = {"Hilfsmittel"}
|
||||
T_SKILL = {"SkillDevelopment"}
|
||||
|
||||
|
||||
def parse_exercise(title: str, pageid: int) -> Dict[str, Any]:
|
||||
print(f"[Parse] Lade '{title}' (ID={pageid})")
|
||||
resp = requests.get(
|
||||
f"{API_BASE_URL}/parsepage",
|
||||
params={"pageid": pageid, "title": title},
|
||||
timeout=60
|
||||
timeout=REQUEST_TIMEOUT,
|
||||
)
|
||||
resp.raise_for_status()
|
||||
wikitext = resp.json().get("wikitext", "")
|
||||
wikicode = mwparserfromhell.parse(wikitext)
|
||||
|
||||
raw: Dict[str, Any] = {"title": title, "source": "MediaWiki", "pageid": pageid}
|
||||
raw: Dict[str, Any] = {"title": title, "source": "MediaWiki", "pageid": pageid, "wikitext": wikitext}
|
||||
|
||||
for tpl in wikicode.filter_templates():
|
||||
name = str(tpl.name).strip()
|
||||
if name == "ÜbungInfoBox":
|
||||
if name in T_INFOS or name in T_BESCHR or name in T_HILFS:
|
||||
for p in tpl.params:
|
||||
raw[str(p.name).strip()] = str(p.value).strip()
|
||||
elif name == "Übungsbeschreibung":
|
||||
for p in tpl.params:
|
||||
raw[str(p.name).strip()] = str(p.value).strip()
|
||||
elif name == "SkillDevelopment":
|
||||
key = str(p.name).strip()
|
||||
val = str(p.value).strip()
|
||||
raw[key] = val
|
||||
elif name in T_SKILL:
|
||||
raw.setdefault("capabilities", [])
|
||||
def _getp(t, k):
|
||||
try:
|
||||
return str(t.get(k).value).strip()
|
||||
except Exception:
|
||||
return ""
|
||||
cap = _getp(tpl, "PrimaryCapability")
|
||||
lvl = _getp(tpl, "CapabilityLevel")
|
||||
try:
|
||||
cap = str(tpl.get("PrimaryCapability").value).strip()
|
||||
lvl_i = int(lvl)
|
||||
except Exception:
|
||||
cap = ""
|
||||
try:
|
||||
lvl = int(str(tpl.get("CapabilityLevel").value).strip())
|
||||
except Exception:
|
||||
lvl = 0
|
||||
lvl_i = 0
|
||||
if cap:
|
||||
raw["capabilities"].append({"capability": cap, "level": lvl})
|
||||
raw["wikitext"] = wikitext
|
||||
raw["capabilities"].append({"capability": cap, "level": lvl_i})
|
||||
|
||||
return raw
|
||||
|
||||
def build_payload(raw: Dict[str, Any], fullurl: str, category: str) -> Dict[str, Any]:
|
||||
# Exercise.capabilities erwartet Dict[str,int]
|
||||
caps_list = raw.get("capabilities", [])
|
||||
capabilities = {}
|
||||
for c in caps_list:
|
||||
cap = c.get("capability")
|
||||
lvl = c.get("level")
|
||||
# ----- Fingerprint (stabil, wie zuvor) -----
|
||||
|
||||
def _normalize(v: Any) -> str:
|
||||
if v is None:
|
||||
return ""
|
||||
if isinstance(v, (list, tuple)):
|
||||
return ",".join(_normalize(x) for x in v)
|
||||
if isinstance(v, dict):
|
||||
return json.dumps(v, sort_keys=True, ensure_ascii=False)
|
||||
return str(v).strip()
|
||||
|
||||
|
||||
def _norm_text(s: str) -> str:
|
||||
if s is None:
|
||||
return ""
|
||||
s = str(s).replace("\u00a0", " ")
|
||||
s = " ".join(s.split())
|
||||
return s.strip()
|
||||
|
||||
|
||||
def _canon_title(t: str) -> str:
|
||||
t = (t or "").strip().replace("_", " ")
|
||||
return t.replace("–", "-").replace("—", "-")
|
||||
|
||||
|
||||
def compute_fingerprint(payload: Dict[str, Any]) -> str:
|
||||
kws = payload.get("keywords") or []
|
||||
kws = [k.replace("\u2013", "-").replace("\u2014", "-") for k in kws]
|
||||
kws = sorted({(k or "").strip() for k in kws if (k or "").strip()}, key=str.casefold)
|
||||
dur = payload.get("duration_minutes") or 0
|
||||
try:
|
||||
dur = int(round(float(dur)))
|
||||
except Exception:
|
||||
dur = 0
|
||||
fields = [
|
||||
_canon_title(payload.get("title", "")),
|
||||
_norm_text(payload.get("summary", "")),
|
||||
_norm_text(payload.get("execution", "")),
|
||||
_norm_text(payload.get("notes", "")),
|
||||
dur,
|
||||
payload.get("capabilities", {}),
|
||||
kws,
|
||||
]
|
||||
base = "|".join(_normalize(f) for f in fields)
|
||||
return hashlib.sha256(base.encode("utf-8")).hexdigest()
|
||||
|
||||
# ----- Payload (exakte DE-Keys zuerst, dann schmale Fallbacks) -----
|
||||
|
||||
EXACT_KEYS = {
|
||||
"summary": ["Summary", "Kurzbeschreibung"],
|
||||
"execution": ["Durchführung", "Durchfuehrung", "Ablauf"],
|
||||
"duration": ["Dauer", "Zeit"],
|
||||
"keywords": ["Schlüsselworte", "Schlüsselwörter", "Schluesselworte", "Schluesselwoerter", "Keywords", "Tags"],
|
||||
"equipment_prim": ["Hilfsmittel"],
|
||||
"equipment_alt": ["Geräte", "Geraete", "Gerät", "Geraet", "Material"],
|
||||
"discipline": ["Übungstyp", "Uebungstyp", "Disziplin"],
|
||||
"group": ["Gruppengröße", "Gruppengroesse", "Group"],
|
||||
"age_group": ["Altersgruppe"],
|
||||
"target_group": ["Zielgruppe"],
|
||||
"purpose": ["Ziel", "Zweck"],
|
||||
"notes": ["Hinweise", "Notes"],
|
||||
"preparation": ["Vorbereitung", "RefMethode"],
|
||||
"method": ["Methode", "Method"],
|
||||
}
|
||||
|
||||
|
||||
def _first_any(raw: Dict[str, Any], keys: List[str]) -> Optional[str]:
|
||||
for k in keys:
|
||||
v = raw.get(k)
|
||||
if isinstance(v, str) and v.strip():
|
||||
return v.strip()
|
||||
return None
|
||||
|
||||
|
||||
def build_payload(raw: Dict[str, Any], fullurl: str, category: str, *, mutate: bool = False) -> Dict[str, Any]:
|
||||
# Capabilities -> Dict[str,int]
|
||||
capabilities: Dict[str, int] = {}
|
||||
for c in raw.get("capabilities", []) or []:
|
||||
cap = c.get("capability"); lvl = c.get("level")
|
||||
if isinstance(cap, str) and cap:
|
||||
try:
|
||||
capabilities[cap] = int(lvl)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Defaults/Fallbacks
|
||||
duration = 0.0
|
||||
# Exakte Schlüssel zuerst
|
||||
summary = _first_any(raw, EXACT_KEYS["summary"]) or ""
|
||||
execution = _first_any(raw, EXACT_KEYS["execution"]) or ""
|
||||
duration = _first_any(raw, EXACT_KEYS["duration"]) or "0"
|
||||
|
||||
kw_raw = _first_any(raw, EXACT_KEYS["keywords"]) or ""
|
||||
if kw_raw:
|
||||
parts = [p.strip() for p in kw_raw.replace("\n", ",").split(",")]
|
||||
keywords = [p for p in parts if p]
|
||||
else:
|
||||
keywords = []
|
||||
|
||||
eq_raw = _first_any(raw, EXACT_KEYS["equipment_prim"]) or _first_any(raw, EXACT_KEYS["equipment_alt"]) or ""
|
||||
if eq_raw:
|
||||
equipment = [e.strip() for e in eq_raw.replace("\n", ",").split(",") if e.strip()]
|
||||
else:
|
||||
equipment = []
|
||||
|
||||
notes = _first_any(raw, EXACT_KEYS["notes"]) or ""
|
||||
discipline = _first_any(raw, EXACT_KEYS["discipline"]) or ""
|
||||
group = _first_any(raw, EXACT_KEYS["group"]) or None
|
||||
age_group = _first_any(raw, EXACT_KEYS["age_group"]) or ""
|
||||
target_group = _first_any(raw, EXACT_KEYS["target_group"]) or ""
|
||||
purpose = _first_any(raw, EXACT_KEYS["purpose"]) or ""
|
||||
preparation = _first_any(raw, EXACT_KEYS["preparation"]) or ""
|
||||
method = _first_any(raw, EXACT_KEYS["method"]) or ""
|
||||
|
||||
try:
|
||||
duration = float(raw.get("Dauer", 0) or 0)
|
||||
duration_f = float(duration or 0)
|
||||
except Exception:
|
||||
duration = 0.0
|
||||
|
||||
keywords = []
|
||||
kw_raw = raw.get("Schlüsselworte", "")
|
||||
if isinstance(kw_raw, str):
|
||||
keywords = [k.strip() for k in kw_raw.split(",") if k.strip()]
|
||||
|
||||
equipment = []
|
||||
eq_raw = raw.get("equipment", [])
|
||||
if isinstance(eq_raw, str):
|
||||
equipment = [e.strip() for e in eq_raw.split(",") if e.strip()]
|
||||
elif isinstance(eq_raw, list):
|
||||
equipment = [str(e).strip() for e in eq_raw if str(e).strip()]
|
||||
duration_f = 0.0
|
||||
|
||||
payload: Dict[str, Any] = {
|
||||
"title": raw.get("title") or "",
|
||||
"summary": raw.get("Summary", "") or "",
|
||||
"short_description": raw.get("Summary", "") or "",
|
||||
"summary": summary,
|
||||
"short_description": summary,
|
||||
"keywords": keywords,
|
||||
"link": fullurl or "",
|
||||
"discipline": raw.get("Übungstyp", "") or "",
|
||||
"group": raw.get("Gruppengröße", "") or None,
|
||||
"age_group": raw.get("Altersgruppe", "") or "",
|
||||
"target_group": raw.get("Zielgruppe", "") or "",
|
||||
"discipline": discipline,
|
||||
"group": group,
|
||||
"age_group": age_group,
|
||||
"target_group": target_group,
|
||||
"min_participants": 1,
|
||||
"duration_minutes": int(round(duration)), # Exercise erwartet int
|
||||
"duration_minutes": int(round(duration_f)),
|
||||
"capabilities": capabilities,
|
||||
"category": category or "",
|
||||
"purpose": raw.get("Ziel", "") or "",
|
||||
"execution": raw.get("Durchführung", "") or "",
|
||||
"notes": raw.get("Hinweise", "") or "",
|
||||
"preparation": raw.get("RefMethode", "") or "",
|
||||
"method": raw.get("method", "") or "", # falls im Wikitext vorhanden
|
||||
"purpose": purpose,
|
||||
"execution": execution,
|
||||
"notes": (notes + (" [auto-update]" if mutate else "")).strip(),
|
||||
"preparation": preparation,
|
||||
"method": method,
|
||||
"equipment": equipment,
|
||||
"fullurl": fullurl or "", # optionales Feld
|
||||
# Idempotenz (optional nutzbar in exercise_router):
|
||||
"external_id": f"wiki:{raw.get('pageid')}",
|
||||
"source": "MediaWiki"
|
||||
"fullurl": fullurl or "",
|
||||
"external_id": f"mw:{raw.get('pageid')}",
|
||||
"source": "MediaWiki",
|
||||
}
|
||||
payload["fingerprint"] = compute_fingerprint(payload)
|
||||
return payload
|
||||
|
||||
def ingest_exercise(payload: Dict[str, Any]) -> None:
|
||||
# ----- Lookup/Upsert -----
|
||||
|
||||
def lookup_by_external_id(external_id: str) -> Tuple[Optional[Dict[str, Any]], Optional[int]]:
|
||||
url = f"{EXERCISE_API}/by-external-id"
|
||||
try:
|
||||
r = requests.get(url, params={"external_id": external_id}, timeout=REQUEST_TIMEOUT)
|
||||
if r.status_code == 404:
|
||||
return None, 404
|
||||
r.raise_for_status()
|
||||
return r.json(), r.status_code
|
||||
except requests.HTTPError as e:
|
||||
return {"error": str(e), "status_code": getattr(e.response, "status_code", None)}, getattr(e.response, "status_code", None)
|
||||
except Exception as e:
|
||||
return {"error": str(e)}, None
|
||||
|
||||
|
||||
def _payload_subset_for_fp(p: Dict[str, Any]) -> Dict[str, Any]:
|
||||
return {
|
||||
"title": p.get("title"),
|
||||
"summary": p.get("summary"),
|
||||
"execution": p.get("execution"),
|
||||
"notes": p.get("notes"),
|
||||
"duration_minutes": p.get("duration_minutes"),
|
||||
"capabilities": p.get("capabilities") or {},
|
||||
"keywords": p.get("keywords") or [],
|
||||
}
|
||||
|
||||
|
||||
def _print_diff(before: Dict[str, Any], after: Dict[str, Any]) -> None:
|
||||
keys = ["title","summary","execution","notes","duration_minutes","capabilities","keywords"]
|
||||
b = {k: before.get(k) for k in keys}
|
||||
a = {k: after.get(k) for k in keys}
|
||||
def _kws(x):
|
||||
return sorted({(k or "").strip() for k in (x or [])}, key=str.casefold)
|
||||
b_norm = {
|
||||
"title": _canon_title(b.get("title")),
|
||||
"summary": _norm_text(b.get("summary")),
|
||||
"execution": _norm_text(b.get("execution")),
|
||||
"notes": _norm_text(b.get("notes")),
|
||||
"duration_minutes": b.get("duration_minutes"),
|
||||
"capabilities": b.get("capabilities"),
|
||||
"keywords": _kws(b.get("keywords")),
|
||||
}
|
||||
a_norm = {
|
||||
"title": _canon_title(a.get("title")),
|
||||
"summary": _norm_text(a.get("summary")),
|
||||
"execution": _norm_text(a.get("execution")),
|
||||
"notes": _norm_text(a.get("notes")),
|
||||
"duration_minutes": a.get("duration_minutes"),
|
||||
"capabilities": a.get("capabilities"),
|
||||
"keywords": _kws(a.get("keywords")),
|
||||
}
|
||||
diff = {k: (b_norm[k], a_norm[k]) for k in keys if b_norm.get(k) != a_norm.get(k)}
|
||||
if diff:
|
||||
print("[Diff] changes:", json.dumps(diff, ensure_ascii=False))
|
||||
else:
|
||||
print("[Diff] (none in hash fields)")
|
||||
|
||||
|
||||
def _now_iso() -> str:
|
||||
return time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
|
||||
|
||||
|
||||
def upsert_exercise(payload: Dict[str, Any], *, dry_run: bool = False) -> str:
|
||||
title = payload.get("title", "<ohne Titel>")
|
||||
resp = requests.post(EXERCISE_API, json=payload, timeout=60)
|
||||
if resp.status_code == 422:
|
||||
print(f"[Ingest] '{title}' -> FAILED 422:\n{resp.text}")
|
||||
ext_id = payload.get("external_id")
|
||||
fp_new = payload.get("fingerprint")
|
||||
|
||||
found, status = lookup_by_external_id(ext_id)
|
||||
|
||||
action = "create"; reason = "not found (lookup 404)"; found_payload = {}
|
||||
|
||||
if not (status == 404 or found is None):
|
||||
if isinstance(found, dict):
|
||||
found_payload = found.get("payload", found)
|
||||
fp_old_stored = found.get("fingerprint") or found_payload.get("fingerprint")
|
||||
fp_old_recalc = compute_fingerprint(_payload_subset_for_fp(found_payload))
|
||||
if fp_new == fp_old_stored or fp_new == fp_old_recalc:
|
||||
action, reason = "skip", "fingerprint unchanged"
|
||||
else:
|
||||
action, reason = "update", "fingerprint changed"
|
||||
else:
|
||||
action, reason = "create", "unexpected lookup type"
|
||||
|
||||
if dry_run:
|
||||
print(f"[DryRun] {action.upper():6} '{title}' ({ext_id}) – {reason}")
|
||||
if action == "update":
|
||||
_print_diff(found_payload, payload)
|
||||
return action
|
||||
|
||||
if action == "create":
|
||||
body = dict(payload); body["imported_at"] = _now_iso()
|
||||
resp = requests.post(EXERCISE_API, json=body, timeout=REQUEST_TIMEOUT)
|
||||
if resp.status_code == 422:
|
||||
print(f"[Create] '{title}' -> FAILED 422:\n{resp.text}")
|
||||
try: resp.raise_for_status()
|
||||
except Exception: pass
|
||||
else:
|
||||
resp.raise_for_status(); print(f"[Create] '{title}' – {reason} -> OK")
|
||||
elif action == "update":
|
||||
body = dict(payload); body["imported_at"] = _now_iso()
|
||||
resp = requests.post(EXERCISE_API, json=body, timeout=REQUEST_TIMEOUT)
|
||||
if resp.status_code == 422:
|
||||
print(f"[Update] '{title}' -> FAILED 422:\n{resp.text}")
|
||||
try: resp.raise_for_status()
|
||||
except Exception: pass
|
||||
else:
|
||||
resp.raise_for_status(); print(f"[Update] '{title}' – {reason} -> OK"); _print_diff(found_payload, payload)
|
||||
else:
|
||||
print(f"[Skip] '{title}' – {reason}")
|
||||
return action
|
||||
|
||||
# ----- Orchestrierung -----
|
||||
|
||||
def process_one(title: str, category: str, *, mutate: bool = False, dry_run: bool = False, debug_raw: bool = False) -> str:
|
||||
info = fetch_page_info(title)
|
||||
pid = info.get("pageid"); fullurl = info.get("fullurl") or ""
|
||||
if not pid:
|
||||
print(f"[Error] pageid für '{title}' nicht gefunden.", file=sys.stderr); return "failed"
|
||||
raw = parse_exercise(title, pid)
|
||||
if debug_raw:
|
||||
print("[Debug] Raw-Keys:", sorted([k for k in raw.keys() if k not in {"wikitext"}]))
|
||||
payload = build_payload(raw, fullurl, category, mutate=mutate)
|
||||
return upsert_exercise(payload, dry_run=dry_run)
|
||||
|
||||
|
||||
def process_all(category: str, *, dry_run: bool = False, debug_raw: bool = False) -> Dict[str, int]:
|
||||
stats = {"created": 0, "updated": 0, "skipped": 0, "failed": 0}
|
||||
print(f"[Main] Lade Liste der Übungen aus Kategorie '{category}'…")
|
||||
pages = fetch_all_pages(category)
|
||||
print(f"[Main] {len(pages)} Seiten gefunden.")
|
||||
|
||||
for idx, (title, entry) in enumerate(pages.items(), 1):
|
||||
try:
|
||||
resp.raise_for_status()
|
||||
except Exception:
|
||||
pass
|
||||
return
|
||||
resp.raise_for_status()
|
||||
print(f"[Ingest] '{title}' -> OK")
|
||||
getter = getattr(entry, "get", None)
|
||||
pid = getter("pageid") if callable(getter) else None
|
||||
fullurl = getter("fullurl") if callable(getter) else None
|
||||
if not pid:
|
||||
info = fetch_page_info(title); pid = info.get("pageid"); fullurl = fullurl or info.get("fullurl")
|
||||
if not pid:
|
||||
print(f"[Skip] '{title}' hat keine pageid"); stats["failed"] += 1; continue
|
||||
raw = parse_exercise(title, pid)
|
||||
if debug_raw and idx <= 5:
|
||||
print(f"[Debug] #{idx} '{title}' Raw-Keys:", sorted([k for k in raw.keys() if k not in {"wikitext"}]))
|
||||
payload = build_payload(raw, fullurl or "", category)
|
||||
act = upsert_exercise(payload, dry_run=dry_run)
|
||||
stats["created" if act=="create" else "updated" if act=="update" else "skipped"] += 1
|
||||
except requests.HTTPError as e:
|
||||
code = getattr(e, "response", None).status_code if getattr(e, "response", None) else None
|
||||
if code == 404:
|
||||
print(f"[Skip] '{title}': page not found (404)"); stats["failed"] += 1
|
||||
else:
|
||||
print(f"[Error] '{title}': {e}"); stats["failed"] += 1
|
||||
except Exception as e:
|
||||
print(f"[Error] '{title}': {e}"); stats["failed"] += 1
|
||||
return stats
|
||||
|
||||
|
||||
def run_smoke_test(title: str, category: str, *, debug_raw: bool = False) -> None:
|
||||
print("\n[SmokeTest] Lauf 1/3: CREATE (Erstimport)"); act1 = process_one(title, category, mutate=False, debug_raw=debug_raw); print("[SmokeTest] Aktion:", act1)
|
||||
print("\n[SmokeTest] Lauf 2/3: SKIP (Wiederholung, unverändert)"); act2 = process_one(title, category, mutate=False, debug_raw=debug_raw); print("[SmokeTest] Aktion:", act2)
|
||||
print("\n[SmokeTest] Lauf 3/3: UPDATE (simulierte Wiki-Änderung an 'notes')"); act3 = process_one(title, category, mutate=True, debug_raw=debug_raw); print("[SmokeTest] Aktion:", act3)
|
||||
print("\n[SmokeTest] Zusammenfassung:"); print(json.dumps({"run1": act1, "run2": act2, "run3": act3}, ensure_ascii=False, indent=2))
|
||||
|
||||
# ----- Main -----
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser(description="Import exercises from Wiki to Qdrant (via FastAPI wiki_router)")
|
||||
parser.add_argument("--all", action="store_true", help="Alle Übungen importieren (SMW-Ask)")
|
||||
|
|
@ -186,13 +439,13 @@ def main() -> None:
|
|||
parser.add_argument("--username", type=str, default=os.getenv("WIKI_BOT_USER"), help="Wiki-Login Benutzer (überschreibt .env)")
|
||||
parser.add_argument("--password", type=str, default=os.getenv("WIKI_BOT_PASSWORD"), help="Wiki-Login Passwort (überschreibt .env)")
|
||||
parser.add_argument("--skip-login", action="store_true", help="Login-Schritt überspringen (falls Session schon aktiv)")
|
||||
|
||||
parser.add_argument("--dry-run", action="store_true", help="Kein Schreiben; nur Entscheidungen (create/update/skip) + Gründe loggen")
|
||||
parser.add_argument("--smoke-test", action="store_true", help="3 Durchläufe (create→skip→update) für --title")
|
||||
parser.add_argument("--debug-raw", action="store_true", help="Zeigt die aus dem Wiki gelesenen Roh-Keys je Seite")
|
||||
args = parser.parse_args()
|
||||
|
||||
# Sanity
|
||||
wiki_health()
|
||||
|
||||
# Login (sofern nicht explizit übersprungen)
|
||||
if not args.skip_login:
|
||||
if not args.username or not args.password:
|
||||
print("[Login] Fehler: fehlende Credentials. Setze .env (WIKI_BOT_USER/WIKI_BOT_PASSWORD) oder CLI --username/--password.", file=sys.stderr)
|
||||
|
|
@ -200,39 +453,18 @@ def main() -> None:
|
|||
try:
|
||||
wiki_login(args.username, args.password)
|
||||
except Exception as e:
|
||||
print(str(e), file=sys.stderr)
|
||||
sys.exit(1)
|
||||
print(str(e), file=sys.stderr); sys.exit(1)
|
||||
|
||||
if args.smoke_test:
|
||||
run_smoke_test(args.title, args.category, debug_raw=args.debug_raw); return
|
||||
|
||||
# Einzel- oder Vollimport
|
||||
if args.all:
|
||||
print(f"[Main] Lade Liste der Übungen aus Kategorie '{args.category}'…")
|
||||
pages = fetch_all_pages(args.category)
|
||||
print(f"[Main] {len(pages)} Seiten gefunden.")
|
||||
for title, entry in pages.items():
|
||||
pid = entry.get("pageid")
|
||||
fullurl = entry.get("fullurl")
|
||||
if not pid:
|
||||
# Core-Info nachschlagen
|
||||
info = fetch_page_info(title)
|
||||
pid = info.get("pageid")
|
||||
fullurl = fullurl or info.get("fullurl")
|
||||
if not pid:
|
||||
print(f"[Skip] '{title}' hat keine pageid")
|
||||
continue
|
||||
raw = parse_exercise(title, pid)
|
||||
payload = build_payload(raw, fullurl or "", args.category)
|
||||
ingest_exercise(payload)
|
||||
stats = process_all(args.category, dry_run=args.dry_run, debug_raw=args.debug_raw)
|
||||
print("\n[Stats] created={created} updated={updated} skipped={skipped} failed={failed}".format(**stats))
|
||||
else:
|
||||
print(f"[Main] Import single exercise: {args.title}")
|
||||
info = fetch_page_info(args.title)
|
||||
pid = info.get("pageid")
|
||||
fullurl = info.get("fullurl") or ""
|
||||
if not pid:
|
||||
print(f"[Error] pageid für '{args.title}' nicht gefunden.", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
raw = parse_exercise(args.title, pid)
|
||||
payload = build_payload(raw, fullurl, args.category)
|
||||
ingest_exercise(payload)
|
||||
result = process_one(args.title, args.category, mutate=False, dry_run=args.dry_run, debug_raw=args.debug_raw)
|
||||
print(f"[Result] {result}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
|
|||
93
tests/test_integrity_wp15.py
Normal file
93
tests/test_integrity_wp15.py
Normal file
|
|
@ -0,0 +1,93 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""Integritätstests für Referenzen (Templates, Exercises, Plans)."""
|
||||
import os, requests, uuid
|
||||
from datetime import datetime, timezone
|
||||
|
||||
BASE = os.getenv("BASE_URL", "http://127.0.0.1:8000").rstrip("/")
|
||||
|
||||
|
||||
def _make_exercise(ext_id: str):
|
||||
payload = {
|
||||
"external_id": ext_id,
|
||||
"fingerprint": "it-sha",
|
||||
"source": "IntegrityTest",
|
||||
"title": "Übung Dummy",
|
||||
"summary": "",
|
||||
"short_description": "",
|
||||
"keywords": ["Reaktion"],
|
||||
"discipline": "Karate",
|
||||
"age_group": "Teenager",
|
||||
"target_group": "Breitensport",
|
||||
"min_participants": 1,
|
||||
"duration_minutes": 5,
|
||||
"capabilities": {"Reaktionsfähigkeit": 1},
|
||||
"category": "Übungen",
|
||||
"purpose": "",
|
||||
"execution": "",
|
||||
"notes": "",
|
||||
"preparation": "",
|
||||
"method": "",
|
||||
"equipment": []
|
||||
}
|
||||
r = requests.post(f"{BASE}/exercise", json=payload)
|
||||
assert r.status_code == 200, r.text
|
||||
|
||||
|
||||
def test_plan_requires_existing_template():
|
||||
# Übung anlegen für gültigen Plan (damit Exercise-Check nicht stört)
|
||||
exid = f"it:{uuid.uuid4()}"; _make_exercise(exid)
|
||||
|
||||
# Plan mit nicht existenter template_id
|
||||
plan = {
|
||||
"template_id": "does-not-exist",
|
||||
"title": "Plan mit falschem Template",
|
||||
"discipline": "Karate",
|
||||
"age_group": "Teenager",
|
||||
"target_group": "Breitensport",
|
||||
"total_minutes": 30,
|
||||
"sections": [{"name": "Block", "minutes": 30, "items": [{"exercise_external_id": exid, "duration": 10, "why": ""}]}],
|
||||
"goals": [],
|
||||
"capability_summary": {},
|
||||
"created_by": "tester",
|
||||
"created_at": datetime.now(timezone.utc).isoformat(),
|
||||
"source": "test"
|
||||
}
|
||||
r = requests.post(f"{BASE}/plan", json=plan)
|
||||
assert r.status_code == 422
|
||||
|
||||
|
||||
def test_plan_session_requires_existing_plan():
|
||||
sess = {
|
||||
"plan_id": "does-not-exist",
|
||||
"executed_at": datetime.now(timezone.utc).isoformat(),
|
||||
"location": "Dojo",
|
||||
"coach": "X",
|
||||
"group_label": "Y",
|
||||
"feedback": {"rating": 3, "notes": ""},
|
||||
"used_equipment": []
|
||||
}
|
||||
r = requests.post(f"{BASE}/plan_sessions", json=sess)
|
||||
assert r.status_code == 422
|
||||
|
||||
|
||||
def test_strict_exercises_if_env():
|
||||
# Nur sinnvoll, wenn Server mit PLAN_STRICT_EXERCISES=1 gestartet wurde
|
||||
if os.getenv("PLAN_STRICT_EXERCISES") not in {"1", "true", "yes", "on"}:
|
||||
import pytest; pytest.skip("Strict-Mode nicht aktiv – Test übersprungen")
|
||||
|
||||
plan = {
|
||||
"title": "Plan mit unbekannter Übung",
|
||||
"discipline": "Karate",
|
||||
"age_group": "Teenager",
|
||||
"target_group": "Breitensport",
|
||||
"total_minutes": 10,
|
||||
"sections": [{"name": "Block", "minutes": 10, "items": [{"exercise_external_id": "unknown:xyz", "duration": 5, "why": ""}]}],
|
||||
"goals": [],
|
||||
"capability_summary": {},
|
||||
"created_by": "tester",
|
||||
"created_at": datetime.now(timezone.utc).isoformat(),
|
||||
"source": "test"
|
||||
}
|
||||
r = requests.post(f"{BASE}/plan", json=plan)
|
||||
assert r.status_code == 422
|
||||
73
tests/test_plan_lists_wp15.py
Normal file
73
tests/test_plan_lists_wp15.py
Normal file
|
|
@ -0,0 +1,73 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""Listen-/Filter-Tests für Templates & Pläne (v0.1.0)."""
|
||||
import os
|
||||
import requests
|
||||
from datetime import datetime, timezone, timedelta
|
||||
|
||||
BASE = os.getenv("BASE_URL", "http://127.0.0.1:8000").rstrip("/")
|
||||
|
||||
|
||||
def test_list_plan_templates_filters():
|
||||
# Zwei Templates anlegen (unterschiedliche Sections/Goals)
|
||||
tpl1 = {
|
||||
"name": "ListTpl A", "discipline": "Karate", "age_group": "Teenager", "target_group": "Breitensport", "total_minutes": 60,
|
||||
"sections": [{"name": "Warmup", "target_minutes": 10, "must_keywords": ["Reaktion"], "forbid_keywords": [], "ideal_keywords": ["Koordination"], "supplement_keywords": [], "capability_targets": {}}],
|
||||
"goals": ["Technik"], "equipment_allowed": [], "created_by": "tester", "version": "1.0"
|
||||
}
|
||||
tpl2 = {
|
||||
"name": "ListTpl B", "discipline": "Karate", "age_group": "Erwachsene", "target_group": "Breitensport", "total_minutes": 60,
|
||||
"sections": [{"name": "Technikblock", "target_minutes": 30, "must_keywords": ["Mae-Geri"], "forbid_keywords": [], "ideal_keywords": ["Timing"], "supplement_keywords": ["Partnerarbeit"], "capability_targets": {}}],
|
||||
"goals": ["Kondition"], "equipment_allowed": [], "created_by": "tester", "version": "1.0"
|
||||
}
|
||||
r1 = requests.post(f"{BASE}/plan_templates", json=tpl1); assert r1.status_code == 200
|
||||
r2 = requests.post(f"{BASE}/plan_templates", json=tpl2); assert r2.status_code == 200
|
||||
|
||||
# Filter: discipline=Karate & section=Warmup → sollte tpl1 enthalten
|
||||
r = requests.get(f"{BASE}/plan_templates", params={"discipline": "Karate", "section": "Warmup", "limit": 10, "offset": 0})
|
||||
assert r.status_code == 200, r.text
|
||||
js = r.json(); assert js["count"] >= 1
|
||||
assert any(tpl["name"] == "ListTpl A" for tpl in js["items"])
|
||||
|
||||
# Filter: keyword=Timing → sollte tpl2 treffen (ideal_keywords)
|
||||
r = requests.get(f"{BASE}/plan_templates", params={"keyword": "Timing"})
|
||||
assert r.status_code == 200
|
||||
js = r.json(); names = [t["name"] for t in js["items"]]
|
||||
assert "ListTpl B" in names
|
||||
|
||||
|
||||
def test_list_plans_filters_and_window():
|
||||
# Plan A (Teenager), Plan B (Erwachsene)
|
||||
base_tpl = {
|
||||
"name": "ListTpl PlanBase", "discipline": "Karate", "age_group": "Teenager", "target_group": "Breitensport", "total_minutes": 45,
|
||||
"sections": [{"name": "Warmup", "target_minutes": 10, "must_keywords": [], "forbid_keywords": [], "capability_targets": {}}],
|
||||
"goals": ["Technik"], "equipment_allowed": [], "created_by": "tester", "version": "1.0"
|
||||
}
|
||||
rt = requests.post(f"{BASE}/plan_templates", json=base_tpl); assert rt.status_code == 200
|
||||
tpl_id = rt.json()["id"]
|
||||
|
||||
now = datetime.now(timezone.utc)
|
||||
plan_a = {
|
||||
"template_id": tpl_id, "title": "List Plan A", "discipline": "Karate", "age_group": "Teenager", "target_group": "Breitensport", "total_minutes": 45,
|
||||
"sections": [{"name": "Warmup", "minutes": 10, "items": []}], "goals": ["Technik"], "capability_summary": {}, "created_by": "tester",
|
||||
"created_at": now.isoformat(), "source": "test"
|
||||
}
|
||||
plan_b = {
|
||||
"template_id": tpl_id, "title": "List Plan B", "discipline": "Karate", "age_group": "Erwachsene", "target_group": "Breitensport", "total_minutes": 45,
|
||||
"sections": [{"name": "Technikblock", "minutes": 30, "items": []}], "goals": ["Kondition"], "capability_summary": {}, "created_by": "tester",
|
||||
"created_at": (now + timedelta(seconds=1)).isoformat(), "source": "test"
|
||||
}
|
||||
ra = requests.post(f"{BASE}/plan", json=plan_a); assert ra.status_code == 200
|
||||
rb = requests.post(f"{BASE}/plan", json=plan_b); assert rb.status_code == 200
|
||||
|
||||
# Filter: age_group=Teenager & section=Warmup → sollte Plan A enthalten
|
||||
r = requests.get(f"{BASE}/plans", params={"age_group": "Teenager", "section": "Warmup"})
|
||||
assert r.status_code == 200
|
||||
js = r.json(); assert js["count"] >= 1
|
||||
assert any(p["title"] == "List Plan A" for p in js["items"])
|
||||
|
||||
# Zeitfenster: created_from nach Plan A → sollte Plan B enthalten
|
||||
r = requests.get(f"{BASE}/plans", params={"created_from": (now + timedelta(milliseconds=500)).isoformat()})
|
||||
assert r.status_code == 200
|
||||
js = r.json(); titles = [p["title"] for p in js["items"]]
|
||||
assert "List Plan B" in titles
|
||||
121
tests/test_plan_sessions_wp15.py
Normal file
121
tests/test_plan_sessions_wp15.py
Normal file
|
|
@ -0,0 +1,121 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""Integration/E2E-Tests für plan_sessions (v0.1.0)."""
|
||||
import os
|
||||
import requests
|
||||
from datetime import datetime, timezone
|
||||
|
||||
BASE = os.getenv("BASE_URL", "http://127.0.0.1:8000").rstrip("/")
|
||||
|
||||
|
||||
def test_plan_session_create_read():
|
||||
# 1) Template minimal
|
||||
tpl = {
|
||||
"name": "Std 60 for sessions",
|
||||
"discipline": "Karate",
|
||||
"age_group": "Erwachsene",
|
||||
"target_group": "Breitensport",
|
||||
"total_minutes": 60,
|
||||
"sections": [
|
||||
{"name": "Warmup", "target_minutes": 10, "must_keywords": [], "forbid_keywords": [], "capability_targets": {}}
|
||||
],
|
||||
"goals": [],
|
||||
"equipment_allowed": [],
|
||||
"created_by": "tester",
|
||||
"version": "1.0"
|
||||
}
|
||||
r1 = requests.post(f"{BASE}/plan_templates", json=tpl)
|
||||
assert r1.status_code == 200, r1.text
|
||||
|
||||
# 2) Plan minimal
|
||||
plan = {
|
||||
"template_id": r1.json()["id"],
|
||||
"title": "Session‑Plan",
|
||||
"discipline": "Karate",
|
||||
"age_group": "Erwachsene",
|
||||
"target_group": "Breitensport",
|
||||
"total_minutes": 60,
|
||||
"sections": [
|
||||
{"name": "Warmup", "minutes": 10, "items": []}
|
||||
],
|
||||
"goals": [],
|
||||
"capability_summary": {},
|
||||
"created_by": "tester",
|
||||
"created_at": datetime.now(timezone.utc).isoformat(),
|
||||
"source": "test"
|
||||
}
|
||||
r2 = requests.post(f"{BASE}/plan", json=plan)
|
||||
assert r2.status_code == 200, r2.text
|
||||
plan_id = r2.json()["id"]
|
||||
|
||||
# 3) Session anlegen
|
||||
session = {
|
||||
"plan_id": plan_id,
|
||||
"executed_at": datetime.now(timezone.utc).isoformat(),
|
||||
"location": "Dojo A",
|
||||
"coach": "Sensei K.",
|
||||
"group_label": "Montag 18:00",
|
||||
"feedback": {"rating": 4, "notes": "Gute Energie, Warmup etwas zu kurz."},
|
||||
"used_equipment": [" Pratzen ", "Bälle", "pratzen"]
|
||||
}
|
||||
r3 = requests.post(f"{BASE}/plan_sessions", json=session)
|
||||
assert r3.status_code == 200, r3.text
|
||||
sess_id = r3.json()["id"]
|
||||
|
||||
# 4) Session lesen & Normalisierung prüfen
|
||||
r4 = requests.get(f"{BASE}/plan_sessions/{sess_id}")
|
||||
assert r4.status_code == 200
|
||||
js = r4.json()
|
||||
# used_equipment dedupliziert/trimmed/casefolded → len == 2
|
||||
assert len(js["used_equipment"]) == 2
|
||||
assert "Pratzen" in js["used_equipment"] or "pratzen" in [x.lower() for x in js["used_equipment"]]
|
||||
|
||||
|
||||
def test_plan_session_invalid_rating():
|
||||
# Minimaler Plan für Referenz
|
||||
tpl = {
|
||||
"name": "Std 45 for sessions",
|
||||
"discipline": "Karate",
|
||||
"age_group": "Erwachsene",
|
||||
"target_group": "Breitensport",
|
||||
"total_minutes": 45,
|
||||
"sections": [
|
||||
{"name": "Warmup", "target_minutes": 10, "must_keywords": [], "forbid_keywords": [], "capability_targets": {}}
|
||||
],
|
||||
"goals": [],
|
||||
"equipment_allowed": [],
|
||||
"created_by": "tester",
|
||||
"version": "1.0"
|
||||
}
|
||||
r1 = requests.post(f"{BASE}/plan_templates", json=tpl)
|
||||
assert r1.status_code == 200
|
||||
plan = {
|
||||
"template_id": r1.json()["id"],
|
||||
"title": "Fehlerfall",
|
||||
"discipline": "Karate",
|
||||
"age_group": "Erwachsene",
|
||||
"target_group": "Breitensport",
|
||||
"total_minutes": 45,
|
||||
"sections": [
|
||||
{"name": "Warmup", "minutes": 10, "items": []}
|
||||
],
|
||||
"goals": [],
|
||||
"capability_summary": {},
|
||||
"created_by": "tester",
|
||||
"created_at": datetime.now(timezone.utc).isoformat(),
|
||||
"source": "test"
|
||||
}
|
||||
r2 = requests.post(f"{BASE}/plan", json=plan)
|
||||
assert r2.status_code == 200
|
||||
|
||||
bad_session = {
|
||||
"plan_id": r2.json()["id"],
|
||||
"executed_at": datetime.now(timezone.utc).isoformat(),
|
||||
"location": "Dojo B",
|
||||
"coach": "Assist.",
|
||||
"group_label": "Dienstag 19:00",
|
||||
"feedback": {"rating": 7, "notes": "invalid"}, # ungültig (1..5)
|
||||
"used_equipment": []
|
||||
}
|
||||
r_bad = requests.post(f"{BASE}/plan_sessions", json=bad_session)
|
||||
assert r_bad.status_code == 422
|
||||
Loading…
Reference in New Issue
Block a user