llm-api/exercise_router.py aktualisiert
All checks were successful
Deploy Trainer_LLM to llm-node / deploy (push) Successful in 2s

This commit is contained in:
Lars 2025-08-13 12:39:44 +02:00
parent 59e7e64af7
commit 6a4e97f4e4

View File

@ -1,15 +1,17 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
""" """
exercise_router.py v1.7.0 exercise_router.py v1.7.1 (Swagger angereichert)
Neu: Ergänzt:
- Endpoint **POST /exercise/search**: kombinierbare Filter (discipline, duration, equipment any/all, keywords any/all, - Aussagekräftige summary/description/response_description je Endpoint
capability_geN / capability_eqN + names) + optionaler Vektor-Query (query-Text). Ausgabe inkl. Score. - Beispiele (x-codeSamples) für curl-Aufrufe
- Facetten erweitert: neben capability_ge1..ge5 jetzt auch capability_eq1..eq5. - Pydantic-Felder mit description + json_schema_extra (Beispiele)
- Idempotenz-Fix & Payload-Scroll (aus v1.6.2) beibehalten. - Keine API-Signatur-/Pfadänderungen, keine Prefix-Änderungen
- API-Signaturen bestehender Routen unverändert.
Hinweis: Die eq/ge-Felder werden beim Upsert gesetzt; für Alt-Punkte einmal das Backfill laufen lassen. Hinweis:
- Endpunkte bleiben weiterhin unter /exercise/* (weil die Routenstrings bereits /exercise/... enthalten).
- Falls du später einen APIRouter-Prefix setzen willst, dann bitte die Pfade unten von '/exercise/...' auf relative Pfade ändern,
sonst entstehen Doppelpfade.
""" """
from fastapi import APIRouter, HTTPException, Query from fastapi import APIRouter, HTTPException, Query
@ -27,77 +29,137 @@ from qdrant_client.models import (
FieldCondition, FieldCondition,
MatchValue, MatchValue,
) )
import logging
import os import os
router = APIRouter() logger = logging.getLogger("exercise_router")
logger.setLevel(logging.INFO)
# Router ohne prefix (Pfadstrings enthalten bereits '/exercise/...')
router = APIRouter(tags=["exercise"])
# ========================= # =========================
# Models # Models
# ========================= # =========================
class Exercise(BaseModel): class Exercise(BaseModel):
id: str = Field(default_factory=lambda: str(uuid4())) id: str = Field(default_factory=lambda: str(uuid4()), description="Interne UUID (Qdrant-Punkt-ID)")
# Upsert-Metadaten # Upsert-Metadaten
external_id: Optional[str] = None external_id: Optional[str] = Field(default=None, description="Upsert-Schlüssel (z. B. 'mw:{pageid}')")
fingerprint: Optional[str] = None fingerprint: Optional[str] = Field(default=None, description="sha256 der Kernfelder für Idempotenz/Diff")
source: Optional[str] = None source: Optional[str] = Field(default=None, description="Quelle (z. B. 'mediawiki', 'pdf-import', …)")
imported_at: Optional[datetime] = None imported_at: Optional[datetime] = Field(default=None, description="Zeitpunkt des Imports (ISO-8601)")
# Domain-Felder # Domain-Felder
title: str title: str = Field(..., description="Übungstitel")
summary: str summary: str = Field(..., description="Kurzbeschreibung/Ziel der Übung")
short_description: str short_description: str = Field(..., description="Alternative Kurzform / Teaser")
keywords: List[str] = [] keywords: List[str] = Field(default_factory=list, description="Freie Schlagworte (normalisiert)")
link: Optional[str] = None link: Optional[str] = Field(default=None, description="Kanonsiche URL/Permalink zur Quelle")
discipline: str discipline: str = Field(..., description="Disziplin (z. B. Karate)")
group: Optional[str] = None group: Optional[str] = Field(default=None, description="Optionale Gruppierung/Kategorie")
age_group: str age_group: str = Field(..., description="Altersgruppe (z. B. Kinder/Schüler/Teenager/Erwachsene)")
target_group: str target_group: str = Field(..., description="Zielgruppe (z. B. Breitensportler)")
min_participants: int min_participants: int = Field(..., ge=0, description="Minimale Gruppenstärke")
duration_minutes: int duration_minutes: int = Field(..., ge=0, description="Dauer in Minuten")
capabilities: Dict[str, int] = {} capabilities: Dict[str, int] = Field(default_factory=dict, description="Fähigkeiten-Map: {Name: Level 1..5}")
category: str category: str = Field(..., description="Abschnitt / Kategorie (z. B. Aufwärmen, Grundschule, …)")
purpose: str purpose: str = Field(..., description="Zweck/Zielabsicht")
execution: str execution: str = Field(..., description="Durchführungsschritte (Markdown/Wiki-ähnlich)")
notes: str notes: str = Field(..., description="Hinweise/Coaching-Cues")
preparation: str preparation: str = Field(..., description="Vorbereitung/Material")
method: str method: str = Field(..., description="Methodik/Didaktik")
equipment: List[str] = [] equipment: List[str] = Field(default_factory=list, description="Benötigte Hilfsmittel")
model_config = {
"json_schema_extra": {
"example": {
"external_id": "mw:218",
"title": "Affenklatschen",
"summary": "Koordination & Aufmerksamkeit mit Ballwechseln",
"short_description": "Ballgewöhnung im Stand/Gehen/Laufen",
"keywords": ["Hand-Auge-Koordination", "Reaktion"],
"link": "https://www.karatetrainer.de/index.php?title=Affenklatschen",
"discipline": "Karate",
"age_group": "Teenager",
"target_group": "Breitensportler",
"min_participants": 4,
"duration_minutes": 8,
"capabilities": {"Reaktionsfähigkeit": 2, "Kopplungsfähigkeit": 2},
"category": "Aufwärmen",
"purpose": "Aufmerksamkeit & Reaktionskette aktivieren",
"execution": "* Paarweise aufstellen …",
"notes": "* nicht zu lange werden lassen",
"preparation": "* Bälle bereit halten",
"method": "* klare Regeln/Strafrunde",
"equipment": ["Bälle"]
}
}
}
class DeleteResponse(BaseModel): class DeleteResponse(BaseModel):
status: str status: str = Field(..., description="Statusmeldung")
count: int count: int = Field(..., ge=0, description="Anzahl betroffener Punkte")
collection: str collection: str = Field(..., description="Qdrant-Collection-Name")
class ExerciseSearchRequest(BaseModel): class ExerciseSearchRequest(BaseModel):
# Optionaler Semantik-Query (Vektor) # Optionaler Semantik-Query (Vektor)
query: Optional[str] = None query: Optional[str] = Field(default=None, description="Freitext für Vektor-Suche (optional)")
limit: int = Field(default=20, ge=1, le=200) limit: int = Field(default=20, ge=1, le=200, description="Max. Treffer")
offset: int = Field(default=0, ge=0) offset: int = Field(default=0, ge=0, description="Offset/Pagination")
# Einfache Filter # Einfache Filter
discipline: Optional[str] = None discipline: Optional[str] = Field(default=None, description="z. B. Karate")
target_group: Optional[str] = None target_group: Optional[str] = Field(default=None, description="z. B. Breitensportler")
age_group: Optional[str] = None age_group: Optional[str] = Field(default=None, description="z. B. Teenager")
max_duration: Optional[int] = Field(default=None, ge=0) max_duration: Optional[int] = Field(default=None, ge=0, description="Obergrenze Minuten")
# Listen-Filter # Listen-Filter
equipment_any: Optional[List[str]] = None # mindestens eins muss passen equipment_any: Optional[List[str]] = Field(default=None, description="Mind. eines muss passen")
equipment_all: Optional[List[str]] = None # alle müssen passen equipment_all: Optional[List[str]] = Field(default=None, description="Alle müssen passen")
keywords_any: Optional[List[str]] = None keywords_any: Optional[List[str]] = Field(default=None, description="Mind. eines muss passen")
keywords_all: Optional[List[str]] = None keywords_all: Optional[List[str]] = Field(default=None, description="Alle müssen passen")
# Capabilities (Namen + Level-Operator) # Capabilities (Namen + Level-Operator)
capability_names: Optional[List[str]] = None capability_names: Optional[List[str]] = Field(default=None, description="Capability-Bezeichnungen")
capability_ge_level: Optional[int] = Field(default=None, ge=1, le=5) capability_ge_level: Optional[int] = Field(default=None, ge=1, le=5, description="Level ≥ N")
capability_eq_level: Optional[int] = Field(default=None, ge=1, le=5) capability_eq_level: Optional[int] = Field(default=None, ge=1, le=5, description="Level == N")
model_config = {
"json_schema_extra": {
"examples": [{
"discipline": "Karate",
"max_duration": 12,
"equipment_any": ["Bälle"],
"capability_names": ["Reaktionsfähigkeit"],
"capability_ge_level": 2,
"limit": 5
}, {
"query": "Aufwärmen Reaktionsfähigkeit 10min Teenager Bälle",
"discipline": "Karate",
"limit": 3
}]
}
}
class ExerciseSearchHit(BaseModel): class ExerciseSearchHit(BaseModel):
id: str id: str = Field(..., description="Qdrant-Punkt-ID")
score: Optional[float] = None score: Optional[float] = Field(default=None, description="Ähnlichkeitsscore (nur bei Vektor-Suche)")
payload: Exercise payload: Exercise = Field(..., description="Übungsdaten (Payload)")
class ExerciseSearchResponse(BaseModel): class ExerciseSearchResponse(BaseModel):
hits: List[ExerciseSearchHit] hits: List[ExerciseSearchHit] = Field(..., description="Trefferliste")
model_config = {
"json_schema_extra": {
"example": {
"hits": [{
"id": "c1f1-…",
"score": 0.78,
"payload": Exercise.model_config["json_schema_extra"]["example"]
}]
}
}
}
# ========================= # =========================
# Helpers # Helpers
@ -160,6 +222,12 @@ def _norm_list(xs: List[Any]) -> List[str]:
def _facet_capabilities(caps: Dict[str, Any]) -> Dict[str, List[str]]: def _facet_capabilities(caps: Dict[str, Any]) -> Dict[str, List[str]]:
"""
Leitet Facettenfelder aus der capabilities-Map ab:
- capability_keys: alle Namen
- capability_geN: Namen mit Level >= N (1..5)
- capability_eqN: Namen mit Level == N (1..5)
"""
caps = caps or {} caps = caps or {}
def names_where(pred) -> List[str]: def names_where(pred) -> List[str]:
@ -194,6 +262,7 @@ def _facet_capabilities(caps: Dict[str, Any]) -> Dict[str, List[str]]:
def _response_strip_extras(payload: Dict[str, Any]) -> Dict[str, Any]: def _response_strip_extras(payload: Dict[str, Any]) -> Dict[str, Any]:
# Nur definierte Exercise-Felder zurückgeben (saubere API)
allowed = set(Exercise.model_fields.keys()) allowed = set(Exercise.model_fields.keys())
return {k: v for k, v in payload.items() if k in allowed} return {k: v for k, v in payload.items() if k in allowed}
@ -209,8 +278,7 @@ def _build_filter(req: ExerciseSearchRequest) -> Filter:
if req.age_group: if req.age_group:
must.append(FieldCondition(key="age_group", match=MatchValue(value=req.age_group))) must.append(FieldCondition(key="age_group", match=MatchValue(value=req.age_group)))
if req.max_duration is not None: if req.max_duration is not None:
# Range ohne Import zusätzlicher Modelle: Qdrant akzeptiert auch {'range': {'lte': n}} per JSON; # Range in Qdrant: über rohen JSON-Range-Ausdruck (Client-Modell hat keinen Komfort-Wrapper)
# über Client-Modell tun wir es hier nicht, da wir Filter primär für Keyword-Felder nutzen.
must.append({"key": "duration_minutes", "range": {"lte": int(req.max_duration)}}) must.append({"key": "duration_minutes", "range": {"lte": int(req.max_duration)}})
# equipment # equipment
@ -218,7 +286,6 @@ def _build_filter(req: ExerciseSearchRequest) -> Filter:
for it in req.equipment_all: for it in req.equipment_all:
must.append(FieldCondition(key="equipment", match=MatchValue(value=it))) must.append(FieldCondition(key="equipment", match=MatchValue(value=it)))
if req.equipment_any: if req.equipment_any:
# OR: über 'should' Liste
for it in req.equipment_any: for it in req.equipment_any:
should.append(FieldCondition(key="equipment", match=MatchValue(value=it))) should.append(FieldCondition(key="equipment", match=MatchValue(value=it)))
@ -248,22 +315,55 @@ def _build_filter(req: ExerciseSearchRequest) -> Filter:
flt = Filter(must=must) flt = Filter(must=must)
if should: if should:
# qdrant: 'should' mit implizitem minimum_should_match=1 # Qdrant: 'should' entspricht OR mit minimum_should_match=1
flt.should = should flt.should = should
return flt return flt
# ========================= # =========================
# Endpoints # Endpoints
# ========================= # =========================
@router.get("/exercise/by-external-id") @router.get(
def get_exercise_by_external_id(external_id: str = Query(..., min_length=3)): "/exercise/by-external-id",
summary="Übung per external_id abrufen",
description=(
"Liefert die Übung mit der gegebenen `external_id` (z. B. `mw:{pageid}`). "
"Verwendet einen Qdrant-Filter auf dem Payload-Feld `external_id`."
),
response_description="Vollständiger Exercise-Payload oder 404 bei Nichtfund.",
openapi_extra={
"x-codeSamples": [{
"lang": "bash",
"label": "curl",
"source": "curl -s 'http://localhost:8000/exercise/by-external-id?external_id=mw:218' | jq ."
}]
}
)
def get_exercise_by_external_id(external_id: str = Query(..., min_length=3, description="Upsert-Schlüssel, z. B. 'mw:218'")):
found = _lookup_by_external_id(external_id) found = _lookup_by_external_id(external_id)
if not found: if not found:
raise HTTPException(status_code=404, detail="not found") raise HTTPException(status_code=404, detail="not found")
return found return found
@router.post("/exercise", response_model=Exercise) @router.post(
"/exercise",
response_model=Exercise,
summary="Create/Update (idempotent per external_id)",
description=(
"Legt eine Übung an oder aktualisiert sie. Wenn `external_id` vorhanden und bereits in der Collection existiert, "
"wird **Update** auf dem bestehenden Punkt ausgeführt (Upsert). `keywords`/`equipment` werden normalisiert, "
"Capability-Facetten (`capability_ge1..5`, `capability_eq1..5`, `capability_keys`) automatisch abgeleitet. "
"Der Vektor wird aus Kernfeldern (title/summary/short_description/purpose/execution/notes) berechnet."
),
response_description="Gespeicherter Exercise-Datensatz (Payload-View).",
openapi_extra={
"x-codeSamples": [{
"lang": "bash",
"label": "curl",
"source": "curl -s -X POST http://localhost:8000/exercise -H 'Content-Type: application/json' -d @exercise.json | jq ."
}]
}
)
def create_or_update_exercise(ex: Exercise): def create_or_update_exercise(ex: Exercise):
_ensure_collection() _ensure_collection()
@ -290,7 +390,20 @@ def create_or_update_exercise(ex: Exercise):
return Exercise(**_response_strip_extras(payload)) return Exercise(**_response_strip_extras(payload))
@router.get("/exercise/{exercise_id}", response_model=Exercise) @router.get(
"/exercise/{exercise_id}",
response_model=Exercise,
summary="Übung per interner ID (Qdrant-Punkt-ID) lesen",
description="Scrollt nach `id` und gibt den Payload als Exercise zurück.",
response_description="Exercise-Payload oder 404 bei Nichtfund.",
openapi_extra={
"x-codeSamples": [{
"lang": "bash",
"label": "curl",
"source": "curl -s 'http://localhost:8000/exercise/1234-uuid' | jq ."
}]
}
)
def get_exercise(exercise_id: str): def get_exercise(exercise_id: str):
_ensure_collection() _ensure_collection()
pts, _ = qdrant.scroll( pts, _ = qdrant.scroll(
@ -306,7 +419,32 @@ def get_exercise(exercise_id: str):
return Exercise(**_response_strip_extras(payload)) return Exercise(**_response_strip_extras(payload))
@router.post("/exercise/search", response_model=ExerciseSearchResponse) @router.post(
"/exercise/search",
response_model=ExerciseSearchResponse,
summary="Suche Übungen (Filter + optional Vektor)",
description=(
"Kombinierbare Filter auf Payload-Feldern (`discipline`, `age_group`, `target_group`, `equipment`, `keywords`, "
"`capability_geN/eqN`) und **optional** Vektor-Suche via `query`. "
"`should`-Filter (equipment_any/keywords_any) wirken als OR (minimum_should_match=1). "
"`max_duration` wird als Range (lte) angewandt. Ergebnis enthält bei Vektor-Suche `score`, sonst `null`."
),
response_description="Trefferliste (payload + Score bei Vektor-Suche).",
openapi_extra={
"x-codeSamples": [
{
"lang": "bash",
"label": "Filter",
"source": "curl -s -X POST http://localhost:8000/exercise/search -H 'Content-Type: application/json' -d '{\"discipline\":\"Karate\",\"max_duration\":12,\"equipment_any\":[\"Bälle\"],\"capability_names\":[\"Reaktionsfähigkeit\"],\"capability_ge_level\":2,\"limit\":5}' | jq ."
},
{
"lang": "bash",
"label": "Vektor + Filter",
"source": "curl -s -X POST http://localhost:8000/exercise/search -H 'Content-Type: application/json' -d '{\"query\":\"Aufwärmen 10min Teenager Bälle\",\"discipline\":\"Karate\",\"limit\":3}' | jq ."
}
]
}
)
def search_exercises(req: ExerciseSearchRequest) -> ExerciseSearchResponse: def search_exercises(req: ExerciseSearchRequest) -> ExerciseSearchResponse:
_ensure_collection() _ensure_collection()
flt = _build_filter(req) flt = _build_filter(req)
@ -314,7 +452,6 @@ def search_exercises(req: ExerciseSearchRequest) -> ExerciseSearchResponse:
hits: List[ExerciseSearchHit] = [] hits: List[ExerciseSearchHit] = []
if req.query: if req.query:
vec = _make_vector_from_query(req.query) vec = _make_vector_from_query(req.query)
# qdrant_client.search unterstützt offset/limit
res = qdrant.search( res = qdrant.search(
collection_name=COLLECTION, collection_name=COLLECTION,
query_vector=vec, query_vector=vec,
@ -327,8 +464,7 @@ def search_exercises(req: ExerciseSearchRequest) -> ExerciseSearchResponse:
payload.setdefault("id", str(h.id)) payload.setdefault("id", str(h.id))
hits.append(ExerciseSearchHit(id=str(h.id), score=float(h.score or 0.0), payload=Exercise(**_response_strip_extras(payload)))) hits.append(ExerciseSearchHit(id=str(h.id), score=float(h.score or 0.0), payload=Exercise(**_response_strip_extras(payload))))
else: else:
# Filter-only: per Scroll (ohne Score); einfache Paginierung via offset/limit # Filter-only: Scroll-Paginierung, Score=None
# Hole offset+limit Punkte und simuliere Score=None
collected = 0 collected = 0
skipped = 0 skipped = 0
next_offset = None next_offset = None
@ -357,8 +493,24 @@ def search_exercises(req: ExerciseSearchRequest) -> ExerciseSearchResponse:
return ExerciseSearchResponse(hits=hits) return ExerciseSearchResponse(hits=hits)
@router.delete("/exercise/delete-by-external-id", response_model=DeleteResponse) @router.delete(
def delete_by_external_id(external_id: str = Query(...)): "/exercise/delete-by-external-id",
response_model=DeleteResponse,
summary="Löscht Punkte mit gegebener external_id",
description=(
"Scrollt nach `external_id` und löscht alle passenden Punkte. "
"Idempotent: wenn nichts gefunden → count=0. Vorsicht: **löscht dauerhaft**."
),
response_description="Status + Anzahl gelöschter Punkte.",
openapi_extra={
"x-codeSamples": [{
"lang": "bash",
"label": "curl",
"source": "curl -s 'http://localhost:8000/exercise/delete-by-external-id?external_id=mw:9999' | jq ."
}]
}
)
def delete_by_external_id(external_id: str = Query(..., description="Upsert-Schlüssel, z. B. 'mw:218'")):
_ensure_collection() _ensure_collection()
flt = Filter(must=[FieldCondition(key="external_id", match=MatchValue(value=external_id))]) flt = Filter(must=[FieldCondition(key="external_id", match=MatchValue(value=external_id))])
pts, _ = qdrant.scroll(collection_name=COLLECTION, scroll_filter=flt, limit=10000, with_payload=False) pts, _ = qdrant.scroll(collection_name=COLLECTION, scroll_filter=flt, limit=10000, with_payload=False)
@ -369,8 +521,24 @@ def delete_by_external_id(external_id: str = Query(...)):
return DeleteResponse(status="🗑️ gelöscht", count=len(ids), collection=COLLECTION) return DeleteResponse(status="🗑️ gelöscht", count=len(ids), collection=COLLECTION)
@router.delete("/exercise/delete-collection", response_model=DeleteResponse) @router.delete(
def delete_collection(collection: str = Query(default=COLLECTION)): "/exercise/delete-collection",
response_model=DeleteResponse,
summary="Collection komplett löschen",
description=(
"Entfernt die gesamte Collection aus Qdrant. **Gefährlich** alle Übungen sind danach weg. "
"Nutze nur in Testumgebungen oder für einen kompletten Neuaufbau."
),
response_description="Status. count=0 (nicht relevant beim Drop).",
openapi_extra={
"x-codeSamples": [{
"lang": "bash",
"label": "curl",
"source": "curl -s 'http://localhost:8000/exercise/delete-collection?collection=exercises' | jq ."
}]
}
)
def delete_collection(collection: str = Query(default=COLLECTION, description="Collection-Name (Default: 'exercises')")):
if not qdrant.collection_exists(collection): if not qdrant.collection_exists(collection):
raise HTTPException(status_code=404, detail=f"Collection '{collection}' nicht gefunden.") raise HTTPException(status_code=404, detail=f"Collection '{collection}' nicht gefunden.")
qdrant.delete_collection(collection_name=collection) qdrant.delete_collection(collection_name=collection)
@ -384,7 +552,6 @@ TEST_DOC = """
Speicher als tests/test_exercise_search.py und mit pytest laufen lassen. Speicher als tests/test_exercise_search.py und mit pytest laufen lassen.
import os, requests import os, requests
BASE = os.getenv("API_BASE", "http://localhost:8000") BASE = os.getenv("API_BASE", "http://localhost:8000")
# 1) Filter-only # 1) Filter-only