diff --git a/llm-api/exercise_router.py b/llm-api/exercise_router.py index 8785887..41df31f 100644 --- a/llm-api/exercise_router.py +++ b/llm-api/exercise_router.py @@ -1,15 +1,17 @@ # -*- coding: utf-8 -*- """ -exercise_router.py – v1.7.0 +exercise_router.py – v1.7.1 (Swagger angereichert) -Neu: -- Endpoint **POST /exercise/search**: kombinierbare Filter (discipline, duration, equipment any/all, keywords any/all, - capability_geN / capability_eqN + names) + optionaler Vektor-Query (query-Text). Ausgabe inkl. Score. -- Facetten erweitert: neben capability_ge1..ge5 jetzt auch capability_eq1..eq5. -- Idempotenz-Fix & Payload-Scroll (aus v1.6.2) beibehalten. -- API-Signaturen bestehender Routen unverändert. +Ergänzt: +- Aussagekräftige summary/description/response_description je Endpoint +- Beispiele (x-codeSamples) für curl-Aufrufe +- Pydantic-Felder mit description + json_schema_extra (Beispiele) +- Keine API-Signatur-/Pfadänderungen, keine Prefix-Änderungen -Hinweis: Die „eq/ge“-Felder werden beim Upsert gesetzt; für Alt-Punkte einmal das Backfill laufen lassen. +Hinweis: +- Endpunkte bleiben weiterhin unter /exercise/* (weil die Routenstrings bereits /exercise/... enthalten). +- Falls du später einen APIRouter-Prefix setzen willst, dann bitte die Pfade unten von '/exercise/...' auf relative Pfade ändern, + sonst entstehen Doppelpfade. """ from fastapi import APIRouter, HTTPException, Query @@ -27,77 +29,137 @@ from qdrant_client.models import ( FieldCondition, MatchValue, ) +import logging import os -router = APIRouter() +logger = logging.getLogger("exercise_router") +logger.setLevel(logging.INFO) + +# Router ohne prefix (Pfadstrings enthalten bereits '/exercise/...') +router = APIRouter(tags=["exercise"]) # ========================= # Models # ========================= class Exercise(BaseModel): - id: str = Field(default_factory=lambda: str(uuid4())) + id: str = Field(default_factory=lambda: str(uuid4()), description="Interne UUID (Qdrant-Punkt-ID)") # Upsert-Metadaten - external_id: Optional[str] = None - fingerprint: Optional[str] = None - source: Optional[str] = None - imported_at: Optional[datetime] = None + external_id: Optional[str] = Field(default=None, description="Upsert-Schlüssel (z. B. 'mw:{pageid}')") + fingerprint: Optional[str] = Field(default=None, description="sha256 der Kernfelder für Idempotenz/Diff") + source: Optional[str] = Field(default=None, description="Quelle (z. B. 'mediawiki', 'pdf-import', …)") + imported_at: Optional[datetime] = Field(default=None, description="Zeitpunkt des Imports (ISO-8601)") # Domain-Felder - title: str - summary: str - short_description: str - keywords: List[str] = [] - link: Optional[str] = None - discipline: str - group: Optional[str] = None - age_group: str - target_group: str - min_participants: int - duration_minutes: int - capabilities: Dict[str, int] = {} - category: str - purpose: str - execution: str - notes: str - preparation: str - method: str - equipment: List[str] = [] + title: str = Field(..., description="Übungstitel") + summary: str = Field(..., description="Kurzbeschreibung/Ziel der Übung") + short_description: str = Field(..., description="Alternative Kurzform / Teaser") + keywords: List[str] = Field(default_factory=list, description="Freie Schlagworte (normalisiert)") + link: Optional[str] = Field(default=None, description="Kanonsiche URL/Permalink zur Quelle") + discipline: str = Field(..., description="Disziplin (z. B. Karate)") + group: Optional[str] = Field(default=None, description="Optionale Gruppierung/Kategorie") + age_group: str = Field(..., description="Altersgruppe (z. B. Kinder/Schüler/Teenager/Erwachsene)") + target_group: str = Field(..., description="Zielgruppe (z. B. Breitensportler)") + min_participants: int = Field(..., ge=0, description="Minimale Gruppenstärke") + duration_minutes: int = Field(..., ge=0, description="Dauer in Minuten") + capabilities: Dict[str, int] = Field(default_factory=dict, description="Fähigkeiten-Map: {Name: Level 1..5}") + category: str = Field(..., description="Abschnitt / Kategorie (z. B. Aufwärmen, Grundschule, …)") + purpose: str = Field(..., description="Zweck/Zielabsicht") + execution: str = Field(..., description="Durchführungsschritte (Markdown/Wiki-ähnlich)") + notes: str = Field(..., description="Hinweise/Coaching-Cues") + preparation: str = Field(..., description="Vorbereitung/Material") + method: str = Field(..., description="Methodik/Didaktik") + equipment: List[str] = Field(default_factory=list, description="Benötigte Hilfsmittel") + + model_config = { + "json_schema_extra": { + "example": { + "external_id": "mw:218", + "title": "Affenklatschen", + "summary": "Koordination & Aufmerksamkeit mit Ballwechseln", + "short_description": "Ballgewöhnung im Stand/Gehen/Laufen", + "keywords": ["Hand-Auge-Koordination", "Reaktion"], + "link": "https://www.karatetrainer.de/index.php?title=Affenklatschen", + "discipline": "Karate", + "age_group": "Teenager", + "target_group": "Breitensportler", + "min_participants": 4, + "duration_minutes": 8, + "capabilities": {"Reaktionsfähigkeit": 2, "Kopplungsfähigkeit": 2}, + "category": "Aufwärmen", + "purpose": "Aufmerksamkeit & Reaktionskette aktivieren", + "execution": "* Paarweise aufstellen …", + "notes": "* nicht zu lange werden lassen", + "preparation": "* Bälle bereit halten", + "method": "* klare Regeln/Strafrunde", + "equipment": ["Bälle"] + } + } + } class DeleteResponse(BaseModel): - status: str - count: int - collection: str + status: str = Field(..., description="Statusmeldung") + count: int = Field(..., ge=0, description="Anzahl betroffener Punkte") + collection: str = Field(..., description="Qdrant-Collection-Name") class ExerciseSearchRequest(BaseModel): # Optionaler Semantik-Query (Vektor) - query: Optional[str] = None - limit: int = Field(default=20, ge=1, le=200) - offset: int = Field(default=0, ge=0) + query: Optional[str] = Field(default=None, description="Freitext für Vektor-Suche (optional)") + limit: int = Field(default=20, ge=1, le=200, description="Max. Treffer") + offset: int = Field(default=0, ge=0, description="Offset/Pagination") # Einfache Filter - discipline: Optional[str] = None - target_group: Optional[str] = None - age_group: Optional[str] = None - max_duration: Optional[int] = Field(default=None, ge=0) + discipline: Optional[str] = Field(default=None, description="z. B. Karate") + target_group: Optional[str] = Field(default=None, description="z. B. Breitensportler") + age_group: Optional[str] = Field(default=None, description="z. B. Teenager") + max_duration: Optional[int] = Field(default=None, ge=0, description="Obergrenze Minuten") # Listen-Filter - equipment_any: Optional[List[str]] = None # mindestens eins muss passen - equipment_all: Optional[List[str]] = None # alle müssen passen - keywords_any: Optional[List[str]] = None - keywords_all: Optional[List[str]] = None + equipment_any: Optional[List[str]] = Field(default=None, description="Mind. eines muss passen") + equipment_all: Optional[List[str]] = Field(default=None, description="Alle müssen passen") + keywords_any: Optional[List[str]] = Field(default=None, description="Mind. eines muss passen") + keywords_all: Optional[List[str]] = Field(default=None, description="Alle müssen passen") # Capabilities (Namen + Level-Operator) - capability_names: Optional[List[str]] = None - capability_ge_level: Optional[int] = Field(default=None, ge=1, le=5) - capability_eq_level: Optional[int] = Field(default=None, ge=1, le=5) + capability_names: Optional[List[str]] = Field(default=None, description="Capability-Bezeichnungen") + capability_ge_level: Optional[int] = Field(default=None, ge=1, le=5, description="Level ≥ N") + capability_eq_level: Optional[int] = Field(default=None, ge=1, le=5, description="Level == N") + + model_config = { + "json_schema_extra": { + "examples": [{ + "discipline": "Karate", + "max_duration": 12, + "equipment_any": ["Bälle"], + "capability_names": ["Reaktionsfähigkeit"], + "capability_ge_level": 2, + "limit": 5 + }, { + "query": "Aufwärmen Reaktionsfähigkeit 10min Teenager Bälle", + "discipline": "Karate", + "limit": 3 + }] + } + } class ExerciseSearchHit(BaseModel): - id: str - score: Optional[float] = None - payload: Exercise + id: str = Field(..., description="Qdrant-Punkt-ID") + score: Optional[float] = Field(default=None, description="Ähnlichkeitsscore (nur bei Vektor-Suche)") + payload: Exercise = Field(..., description="Übungsdaten (Payload)") class ExerciseSearchResponse(BaseModel): - hits: List[ExerciseSearchHit] + hits: List[ExerciseSearchHit] = Field(..., description="Trefferliste") + + model_config = { + "json_schema_extra": { + "example": { + "hits": [{ + "id": "c1f1-…", + "score": 0.78, + "payload": Exercise.model_config["json_schema_extra"]["example"] + }] + } + } + } # ========================= # Helpers @@ -160,6 +222,12 @@ def _norm_list(xs: List[Any]) -> List[str]: def _facet_capabilities(caps: Dict[str, Any]) -> Dict[str, List[str]]: + """ + Leitet Facettenfelder aus der capabilities-Map ab: + - capability_keys: alle Namen + - capability_geN: Namen mit Level >= N (1..5) + - capability_eqN: Namen mit Level == N (1..5) + """ caps = caps or {} def names_where(pred) -> List[str]: @@ -194,6 +262,7 @@ def _facet_capabilities(caps: Dict[str, Any]) -> Dict[str, List[str]]: def _response_strip_extras(payload: Dict[str, Any]) -> Dict[str, Any]: + # Nur definierte Exercise-Felder zurückgeben (saubere API) allowed = set(Exercise.model_fields.keys()) return {k: v for k, v in payload.items() if k in allowed} @@ -209,8 +278,7 @@ def _build_filter(req: ExerciseSearchRequest) -> Filter: if req.age_group: must.append(FieldCondition(key="age_group", match=MatchValue(value=req.age_group))) if req.max_duration is not None: - # Range ohne Import zusätzlicher Modelle: Qdrant akzeptiert auch {'range': {'lte': n}} per JSON; - # über Client-Modell tun wir es hier nicht, da wir Filter primär für Keyword-Felder nutzen. + # Range in Qdrant: über rohen JSON-Range-Ausdruck (Client-Modell hat keinen Komfort-Wrapper) must.append({"key": "duration_minutes", "range": {"lte": int(req.max_duration)}}) # equipment @@ -218,7 +286,6 @@ def _build_filter(req: ExerciseSearchRequest) -> Filter: for it in req.equipment_all: must.append(FieldCondition(key="equipment", match=MatchValue(value=it))) if req.equipment_any: - # OR: über 'should' Liste for it in req.equipment_any: should.append(FieldCondition(key="equipment", match=MatchValue(value=it))) @@ -248,22 +315,55 @@ def _build_filter(req: ExerciseSearchRequest) -> Filter: flt = Filter(must=must) if should: - # qdrant: 'should' mit implizitem minimum_should_match=1 + # Qdrant: 'should' entspricht OR mit minimum_should_match=1 flt.should = should return flt # ========================= # Endpoints # ========================= -@router.get("/exercise/by-external-id") -def get_exercise_by_external_id(external_id: str = Query(..., min_length=3)): +@router.get( + "/exercise/by-external-id", + summary="Übung per external_id abrufen", + description=( + "Liefert die Übung mit der gegebenen `external_id` (z. B. `mw:{pageid}`). " + "Verwendet einen Qdrant-Filter auf dem Payload-Feld `external_id`." + ), + response_description="Vollständiger Exercise-Payload oder 404 bei Nichtfund.", + openapi_extra={ + "x-codeSamples": [{ + "lang": "bash", + "label": "curl", + "source": "curl -s 'http://localhost:8000/exercise/by-external-id?external_id=mw:218' | jq ." + }] + } +) +def get_exercise_by_external_id(external_id: str = Query(..., min_length=3, description="Upsert-Schlüssel, z. B. 'mw:218'")): found = _lookup_by_external_id(external_id) if not found: raise HTTPException(status_code=404, detail="not found") return found -@router.post("/exercise", response_model=Exercise) +@router.post( + "/exercise", + response_model=Exercise, + summary="Create/Update (idempotent per external_id)", + description=( + "Legt eine Übung an oder aktualisiert sie. Wenn `external_id` vorhanden und bereits in der Collection existiert, " + "wird **Update** auf dem bestehenden Punkt ausgeführt (Upsert). `keywords`/`equipment` werden normalisiert, " + "Capability-Facetten (`capability_ge1..5`, `capability_eq1..5`, `capability_keys`) automatisch abgeleitet. " + "Der Vektor wird aus Kernfeldern (title/summary/short_description/purpose/execution/notes) berechnet." + ), + response_description="Gespeicherter Exercise-Datensatz (Payload-View).", + openapi_extra={ + "x-codeSamples": [{ + "lang": "bash", + "label": "curl", + "source": "curl -s -X POST http://localhost:8000/exercise -H 'Content-Type: application/json' -d @exercise.json | jq ." + }] + } +) def create_or_update_exercise(ex: Exercise): _ensure_collection() @@ -290,7 +390,20 @@ def create_or_update_exercise(ex: Exercise): return Exercise(**_response_strip_extras(payload)) -@router.get("/exercise/{exercise_id}", response_model=Exercise) +@router.get( + "/exercise/{exercise_id}", + response_model=Exercise, + summary="Übung per interner ID (Qdrant-Punkt-ID) lesen", + description="Scrollt nach `id` und gibt den Payload als Exercise zurück.", + response_description="Exercise-Payload oder 404 bei Nichtfund.", + openapi_extra={ + "x-codeSamples": [{ + "lang": "bash", + "label": "curl", + "source": "curl -s 'http://localhost:8000/exercise/1234-uuid' | jq ." + }] + } +) def get_exercise(exercise_id: str): _ensure_collection() pts, _ = qdrant.scroll( @@ -306,7 +419,32 @@ def get_exercise(exercise_id: str): return Exercise(**_response_strip_extras(payload)) -@router.post("/exercise/search", response_model=ExerciseSearchResponse) +@router.post( + "/exercise/search", + response_model=ExerciseSearchResponse, + summary="Suche Übungen (Filter + optional Vektor)", + description=( + "Kombinierbare Filter auf Payload-Feldern (`discipline`, `age_group`, `target_group`, `equipment`, `keywords`, " + "`capability_geN/eqN`) und **optional** Vektor-Suche via `query`. " + "`should`-Filter (equipment_any/keywords_any) wirken als OR (minimum_should_match=1). " + "`max_duration` wird als Range (lte) angewandt. Ergebnis enthält bei Vektor-Suche `score`, sonst `null`." + ), + response_description="Trefferliste (payload + Score bei Vektor-Suche).", + openapi_extra={ + "x-codeSamples": [ + { + "lang": "bash", + "label": "Filter", + "source": "curl -s -X POST http://localhost:8000/exercise/search -H 'Content-Type: application/json' -d '{\"discipline\":\"Karate\",\"max_duration\":12,\"equipment_any\":[\"Bälle\"],\"capability_names\":[\"Reaktionsfähigkeit\"],\"capability_ge_level\":2,\"limit\":5}' | jq ." + }, + { + "lang": "bash", + "label": "Vektor + Filter", + "source": "curl -s -X POST http://localhost:8000/exercise/search -H 'Content-Type: application/json' -d '{\"query\":\"Aufwärmen 10min Teenager Bälle\",\"discipline\":\"Karate\",\"limit\":3}' | jq ." + } + ] + } +) def search_exercises(req: ExerciseSearchRequest) -> ExerciseSearchResponse: _ensure_collection() flt = _build_filter(req) @@ -314,7 +452,6 @@ def search_exercises(req: ExerciseSearchRequest) -> ExerciseSearchResponse: hits: List[ExerciseSearchHit] = [] if req.query: vec = _make_vector_from_query(req.query) - # qdrant_client.search unterstützt offset/limit res = qdrant.search( collection_name=COLLECTION, query_vector=vec, @@ -327,8 +464,7 @@ def search_exercises(req: ExerciseSearchRequest) -> ExerciseSearchResponse: payload.setdefault("id", str(h.id)) hits.append(ExerciseSearchHit(id=str(h.id), score=float(h.score or 0.0), payload=Exercise(**_response_strip_extras(payload)))) else: - # Filter-only: per Scroll (ohne Score); einfache Paginierung via offset/limit - # Hole offset+limit Punkte und simuliere Score=None + # Filter-only: Scroll-Paginierung, Score=None collected = 0 skipped = 0 next_offset = None @@ -357,8 +493,24 @@ def search_exercises(req: ExerciseSearchRequest) -> ExerciseSearchResponse: return ExerciseSearchResponse(hits=hits) -@router.delete("/exercise/delete-by-external-id", response_model=DeleteResponse) -def delete_by_external_id(external_id: str = Query(...)): +@router.delete( + "/exercise/delete-by-external-id", + response_model=DeleteResponse, + summary="Löscht Punkte mit gegebener external_id", + description=( + "Scrollt nach `external_id` und löscht alle passenden Punkte. " + "Idempotent: wenn nichts gefunden → count=0. Vorsicht: **löscht dauerhaft**." + ), + response_description="Status + Anzahl gelöschter Punkte.", + openapi_extra={ + "x-codeSamples": [{ + "lang": "bash", + "label": "curl", + "source": "curl -s 'http://localhost:8000/exercise/delete-by-external-id?external_id=mw:9999' | jq ." + }] + } +) +def delete_by_external_id(external_id: str = Query(..., description="Upsert-Schlüssel, z. B. 'mw:218'")): _ensure_collection() flt = Filter(must=[FieldCondition(key="external_id", match=MatchValue(value=external_id))]) pts, _ = qdrant.scroll(collection_name=COLLECTION, scroll_filter=flt, limit=10000, with_payload=False) @@ -369,8 +521,24 @@ def delete_by_external_id(external_id: str = Query(...)): return DeleteResponse(status="🗑️ gelöscht", count=len(ids), collection=COLLECTION) -@router.delete("/exercise/delete-collection", response_model=DeleteResponse) -def delete_collection(collection: str = Query(default=COLLECTION)): +@router.delete( + "/exercise/delete-collection", + response_model=DeleteResponse, + summary="Collection komplett löschen", + description=( + "Entfernt die gesamte Collection aus Qdrant. **Gefährlich** – alle Übungen sind danach weg. " + "Nutze nur in Testumgebungen oder für einen kompletten Neuaufbau." + ), + response_description="Status. count=0 (nicht relevant beim Drop).", + openapi_extra={ + "x-codeSamples": [{ + "lang": "bash", + "label": "curl", + "source": "curl -s 'http://localhost:8000/exercise/delete-collection?collection=exercises' | jq ." + }] + } +) +def delete_collection(collection: str = Query(default=COLLECTION, description="Collection-Name (Default: 'exercises')")): if not qdrant.collection_exists(collection): raise HTTPException(status_code=404, detail=f"Collection '{collection}' nicht gefunden.") qdrant.delete_collection(collection_name=collection) @@ -384,7 +552,6 @@ TEST_DOC = """ Speicher als tests/test_exercise_search.py und mit pytest laufen lassen. import os, requests - BASE = os.getenv("API_BASE", "http://localhost:8000") # 1) Filter-only