llm-api/exercise_router.py aktualisiert
All checks were successful
Deploy Trainer_LLM to llm-node / deploy (push) Successful in 2s
All checks were successful
Deploy Trainer_LLM to llm-node / deploy (push) Successful in 2s
This commit is contained in:
parent
a6d68134cd
commit
32577a7fda
|
|
@ -1,11 +1,15 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
"""
|
"""
|
||||||
exercise_router.py – v1.6.2
|
exercise_router.py – v1.7.0
|
||||||
|
|
||||||
Fix:
|
Neu:
|
||||||
- Entfernt Import von `WithPayloadSelector` (nicht in allen qdrant-client Builds exportiert)
|
- Endpoint **POST /exercise/search**: kombinierbare Filter (discipline, duration, equipment any/all, keywords any/all,
|
||||||
- Scroll-Aufrufe liefern Payload jetzt über `with_payload=True` (breit kompatibel)
|
capability_geN / capability_eqN + names) + optionaler Vektor-Query (query-Text). Ausgabe inkl. Score.
|
||||||
- Rest wie v1.6.1: Capability-Facetten + Listen-Normalisierung, Idempotenz via external_id
|
- Facetten erweitert: neben capability_ge1..ge5 jetzt auch capability_eq1..eq5.
|
||||||
|
- Idempotenz-Fix & Payload-Scroll (aus v1.6.2) beibehalten.
|
||||||
|
- API-Signaturen bestehender Routen unverändert.
|
||||||
|
|
||||||
|
Hinweis: Die „eq/ge“-Felder werden beim Upsert gesetzt; für Alt-Punkte einmal das Backfill laufen lassen.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from fastapi import APIRouter, HTTPException, Query
|
from fastapi import APIRouter, HTTPException, Query
|
||||||
|
|
@ -33,10 +37,10 @@ router = APIRouter()
|
||||||
class Exercise(BaseModel):
|
class Exercise(BaseModel):
|
||||||
id: str = Field(default_factory=lambda: str(uuid4()))
|
id: str = Field(default_factory=lambda: str(uuid4()))
|
||||||
# Upsert-Metadaten
|
# Upsert-Metadaten
|
||||||
external_id: Optional[str] = None # z.B. "mw:12345"
|
external_id: Optional[str] = None
|
||||||
fingerprint: Optional[str] = None # sha256 über Kernfelder
|
fingerprint: Optional[str] = None
|
||||||
source: Optional[str] = None # Herkunft, z.B. "MediaWiki"
|
source: Optional[str] = None
|
||||||
imported_at: Optional[datetime] = None # vom Import gesetzt (ISO-String wird akzeptiert)
|
imported_at: Optional[datetime] = None
|
||||||
|
|
||||||
# Domain-Felder
|
# Domain-Felder
|
||||||
title: str
|
title: str
|
||||||
|
|
@ -64,6 +68,37 @@ class DeleteResponse(BaseModel):
|
||||||
count: int
|
count: int
|
||||||
collection: str
|
collection: str
|
||||||
|
|
||||||
|
class ExerciseSearchRequest(BaseModel):
|
||||||
|
# Optionaler Semantik-Query (Vektor)
|
||||||
|
query: Optional[str] = None
|
||||||
|
limit: int = Field(default=20, ge=1, le=200)
|
||||||
|
offset: int = Field(default=0, ge=0)
|
||||||
|
|
||||||
|
# Einfache Filter
|
||||||
|
discipline: Optional[str] = None
|
||||||
|
target_group: Optional[str] = None
|
||||||
|
age_group: Optional[str] = None
|
||||||
|
max_duration: Optional[int] = Field(default=None, ge=0)
|
||||||
|
|
||||||
|
# Listen-Filter
|
||||||
|
equipment_any: Optional[List[str]] = None # mindestens eins muss passen
|
||||||
|
equipment_all: Optional[List[str]] = None # alle müssen passen
|
||||||
|
keywords_any: Optional[List[str]] = None
|
||||||
|
keywords_all: Optional[List[str]] = None
|
||||||
|
|
||||||
|
# Capabilities (Namen + Level-Operator)
|
||||||
|
capability_names: Optional[List[str]] = None
|
||||||
|
capability_ge_level: Optional[int] = Field(default=None, ge=1, le=5)
|
||||||
|
capability_eq_level: Optional[int] = Field(default=None, ge=1, le=5)
|
||||||
|
|
||||||
|
class ExerciseSearchHit(BaseModel):
|
||||||
|
id: str
|
||||||
|
score: Optional[float] = None
|
||||||
|
payload: Exercise
|
||||||
|
|
||||||
|
class ExerciseSearchResponse(BaseModel):
|
||||||
|
hits: List[ExerciseSearchHit]
|
||||||
|
|
||||||
# =========================
|
# =========================
|
||||||
# Helpers
|
# Helpers
|
||||||
# =========================
|
# =========================
|
||||||
|
|
@ -71,7 +106,6 @@ COLLECTION = os.getenv("EXERCISE_COLLECTION", "exercises")
|
||||||
|
|
||||||
|
|
||||||
def _ensure_collection():
|
def _ensure_collection():
|
||||||
"""Sicherstellen, dass die Collection existiert (kein Drop)."""
|
|
||||||
if not qdrant.collection_exists(COLLECTION):
|
if not qdrant.collection_exists(COLLECTION):
|
||||||
qdrant.recreate_collection(
|
qdrant.recreate_collection(
|
||||||
collection_name=COLLECTION,
|
collection_name=COLLECTION,
|
||||||
|
|
@ -83,7 +117,6 @@ def _ensure_collection():
|
||||||
|
|
||||||
|
|
||||||
def _lookup_by_external_id(external_id: str) -> Optional[Dict[str, Any]]:
|
def _lookup_by_external_id(external_id: str) -> Optional[Dict[str, Any]]:
|
||||||
"""Lookup via Payload-Filter. Liefert die gespeicherte Payload (mit allen Feldern)."""
|
|
||||||
_ensure_collection()
|
_ensure_collection()
|
||||||
flt = Filter(must=[FieldCondition(key="external_id", match=MatchValue(value=external_id))])
|
flt = Filter(must=[FieldCondition(key="external_id", match=MatchValue(value=external_id))])
|
||||||
pts, _ = qdrant.scroll(
|
pts, _ = qdrant.scroll(
|
||||||
|
|
@ -102,14 +135,16 @@ def _lookup_by_external_id(external_id: str) -> Optional[Dict[str, Any]]:
|
||||||
_DEF_EMBED_FIELDS = ("title", "summary", "short_description", "purpose", "execution", "notes")
|
_DEF_EMBED_FIELDS = ("title", "summary", "short_description", "purpose", "execution", "notes")
|
||||||
|
|
||||||
|
|
||||||
def _make_vector(ex: Exercise) -> List[float]:
|
def _make_vector_from_exercise(ex: Exercise) -> List[float]:
|
||||||
text = ". ".join([getattr(ex, f, "") for f in _DEF_EMBED_FIELDS if getattr(ex, f, None)])
|
text = ". ".join([getattr(ex, f, "") for f in _DEF_EMBED_FIELDS if getattr(ex, f, None)])
|
||||||
vec = model.encode(text).tolist()
|
return model.encode(text).tolist()
|
||||||
return vec
|
|
||||||
|
|
||||||
|
def _make_vector_from_query(query: str) -> List[float]:
|
||||||
|
return model.encode(query).tolist()
|
||||||
|
|
||||||
|
|
||||||
def _norm_list(xs: List[Any]) -> List[str]:
|
def _norm_list(xs: List[Any]) -> List[str]:
|
||||||
"""Trim + Duplikate entfernen + sortieren (stabil für Filter & Fingerprint)."""
|
|
||||||
out = []
|
out = []
|
||||||
seen = set()
|
seen = set()
|
||||||
for x in xs or []:
|
for x in xs or []:
|
||||||
|
|
@ -126,35 +161,102 @@ def _norm_list(xs: List[Any]) -> List[str]:
|
||||||
|
|
||||||
def _facet_capabilities(caps: Dict[str, Any]) -> Dict[str, List[str]]:
|
def _facet_capabilities(caps: Dict[str, Any]) -> Dict[str, List[str]]:
|
||||||
caps = caps or {}
|
caps = caps or {}
|
||||||
def ge(n: int) -> List[str]:
|
|
||||||
|
def names_where(pred) -> List[str]:
|
||||||
out = []
|
out = []
|
||||||
for k, v in caps.items():
|
for k, v in caps.items():
|
||||||
try:
|
try:
|
||||||
if int(v) >= n:
|
iv = int(v)
|
||||||
out.append(str(k))
|
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
iv = 0
|
||||||
return sorted({s.strip() for s in out if s.strip()}, key=str.casefold)
|
if pred(iv):
|
||||||
|
t = str(k).strip()
|
||||||
|
if t:
|
||||||
|
out.append(t)
|
||||||
|
return sorted({t for t in out}, key=str.casefold)
|
||||||
|
|
||||||
all_keys = sorted({str(k).strip() for k in caps.keys() if str(k).strip()}, key=str.casefold)
|
all_keys = sorted({str(k).strip() for k in caps.keys() if str(k).strip()}, key=str.casefold)
|
||||||
return {
|
return {
|
||||||
"capability_keys": all_keys,
|
"capability_keys": all_keys,
|
||||||
"capability_ge1": ge(1),
|
# >= N
|
||||||
"capability_ge2": ge(2),
|
"capability_ge1": names_where(lambda lv: lv >= 1),
|
||||||
"capability_ge3": ge(3),
|
"capability_ge2": names_where(lambda lv: lv >= 2),
|
||||||
|
"capability_ge3": names_where(lambda lv: lv >= 3),
|
||||||
|
"capability_ge4": names_where(lambda lv: lv >= 4),
|
||||||
|
"capability_ge5": names_where(lambda lv: lv >= 5),
|
||||||
|
# == N
|
||||||
|
"capability_eq1": names_where(lambda lv: lv == 1),
|
||||||
|
"capability_eq2": names_where(lambda lv: lv == 2),
|
||||||
|
"capability_eq3": names_where(lambda lv: lv == 3),
|
||||||
|
"capability_eq4": names_where(lambda lv: lv == 4),
|
||||||
|
"capability_eq5": names_where(lambda lv: lv == 5),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def _response_strip_extras(payload: Dict[str, Any]) -> Dict[str, Any]:
|
def _response_strip_extras(payload: Dict[str, Any]) -> Dict[str, Any]:
|
||||||
"""Nur Felder zurückgeben, die im Pydantic-Modell existieren (Extra-Felder bleiben im Qdrant-Payload)."""
|
allowed = set(Exercise.model_fields.keys())
|
||||||
allowed = set(Exercise.model_fields.keys()) # Pydantic v2
|
|
||||||
return {k: v for k, v in payload.items() if k in allowed}
|
return {k: v for k, v in payload.items() if k in allowed}
|
||||||
|
|
||||||
|
|
||||||
|
def _build_filter(req: ExerciseSearchRequest) -> Filter:
|
||||||
|
must: List[Any] = []
|
||||||
|
should: List[Any] = []
|
||||||
|
|
||||||
|
if req.discipline:
|
||||||
|
must.append(FieldCondition(key="discipline", match=MatchValue(value=req.discipline)))
|
||||||
|
if req.target_group:
|
||||||
|
must.append(FieldCondition(key="target_group", match=MatchValue(value=req.target_group)))
|
||||||
|
if req.age_group:
|
||||||
|
must.append(FieldCondition(key="age_group", match=MatchValue(value=req.age_group)))
|
||||||
|
if req.max_duration is not None:
|
||||||
|
# Range ohne Import zusätzlicher Modelle: Qdrant akzeptiert auch {'range': {'lte': n}} per JSON;
|
||||||
|
# über Client-Modell tun wir es hier nicht, da wir Filter primär für Keyword-Felder nutzen.
|
||||||
|
must.append({"key": "duration_minutes", "range": {"lte": int(req.max_duration)}})
|
||||||
|
|
||||||
|
# equipment
|
||||||
|
if req.equipment_all:
|
||||||
|
for it in req.equipment_all:
|
||||||
|
must.append(FieldCondition(key="equipment", match=MatchValue(value=it)))
|
||||||
|
if req.equipment_any:
|
||||||
|
# OR: über 'should' Liste
|
||||||
|
for it in req.equipment_any:
|
||||||
|
should.append(FieldCondition(key="equipment", match=MatchValue(value=it)))
|
||||||
|
|
||||||
|
# keywords
|
||||||
|
if req.keywords_all:
|
||||||
|
for it in req.keywords_all:
|
||||||
|
must.append(FieldCondition(key="keywords", match=MatchValue(value=it)))
|
||||||
|
if req.keywords_any:
|
||||||
|
for it in req.keywords_any:
|
||||||
|
should.append(FieldCondition(key="keywords", match=MatchValue(value=it)))
|
||||||
|
|
||||||
|
# capabilities (ge/eq)
|
||||||
|
if req.capability_names:
|
||||||
|
names = [s for s in req.capability_names if s and s.strip()]
|
||||||
|
if req.capability_eq_level:
|
||||||
|
key = f"capability_eq{int(req.capability_eq_level)}"
|
||||||
|
for n in names:
|
||||||
|
must.append(FieldCondition(key=key, match=MatchValue(value=n)))
|
||||||
|
elif req.capability_ge_level:
|
||||||
|
key = f"capability_ge{int(req.capability_ge_level)}"
|
||||||
|
for n in names:
|
||||||
|
must.append(FieldCondition(key=key, match=MatchValue(value=n)))
|
||||||
|
else:
|
||||||
|
# Default: Level >=1 (alle vorhanden)
|
||||||
|
for n in names:
|
||||||
|
must.append(FieldCondition(key="capability_ge1", match=MatchValue(value=n)))
|
||||||
|
|
||||||
|
flt = Filter(must=must)
|
||||||
|
if should:
|
||||||
|
# qdrant: 'should' mit implizitem minimum_should_match=1
|
||||||
|
flt.should = should
|
||||||
|
return flt
|
||||||
|
|
||||||
# =========================
|
# =========================
|
||||||
# Endpoints
|
# Endpoints
|
||||||
# =========================
|
# =========================
|
||||||
@router.get("/exercise/by-external-id")
|
@router.get("/exercise/by-external-id")
|
||||||
def get_exercise_by_external_id(external_id: str = Query(..., min_length=3)):
|
def get_exercise_by_external_id(external_id: str = Query(..., min_length=3)):
|
||||||
"""Lookup für Idempotenz im Importer. Liefert 404, wenn nicht vorhanden."""
|
|
||||||
found = _lookup_by_external_id(external_id)
|
found = _lookup_by_external_id(external_id)
|
||||||
if not found:
|
if not found:
|
||||||
raise HTTPException(status_code=404, detail="not found")
|
raise HTTPException(status_code=404, detail="not found")
|
||||||
|
|
@ -163,34 +265,23 @@ def get_exercise_by_external_id(external_id: str = Query(..., min_length=3)):
|
||||||
|
|
||||||
@router.post("/exercise", response_model=Exercise)
|
@router.post("/exercise", response_model=Exercise)
|
||||||
def create_or_update_exercise(ex: Exercise):
|
def create_or_update_exercise(ex: Exercise):
|
||||||
"""
|
|
||||||
Upsert-Semantik. Wenn `external_id` existiert und bereits in Qdrant gefunden wird,
|
|
||||||
wird dieselbe Point-ID überschrieben (echtes Update). Ansonsten neuer Eintrag.
|
|
||||||
API-Signatur bleibt identisch (POST /exercise, Body = Exercise).
|
|
||||||
"""
|
|
||||||
_ensure_collection()
|
_ensure_collection()
|
||||||
|
|
||||||
# Bestehende Point-ID übernehmen, falls external_id bereits vorhanden ist
|
|
||||||
point_id = ex.id
|
point_id = ex.id
|
||||||
if ex.external_id:
|
if ex.external_id:
|
||||||
prior = _lookup_by_external_id(ex.external_id)
|
prior = _lookup_by_external_id(ex.external_id)
|
||||||
if prior:
|
if prior:
|
||||||
point_id = prior.get("id", point_id)
|
point_id = prior.get("id", point_id)
|
||||||
|
|
||||||
# Embedding
|
vector = _make_vector_from_exercise(ex)
|
||||||
vector = _make_vector(ex)
|
|
||||||
|
|
||||||
# Payload stabilisieren + Facetten einfügen
|
|
||||||
payload: Dict[str, Any] = ex.model_dump()
|
payload: Dict[str, Any] = ex.model_dump()
|
||||||
payload["id"] = str(point_id)
|
payload["id"] = str(point_id)
|
||||||
payload["keywords"] = _norm_list(payload.get("keywords") or [])
|
payload["keywords"] = _norm_list(payload.get("keywords") or [])
|
||||||
payload["equipment"] = _norm_list(payload.get("equipment") or [])
|
payload["equipment"] = _norm_list(payload.get("equipment") or [])
|
||||||
|
|
||||||
facet = _facet_capabilities(payload.get("capabilities") or {})
|
payload.update(_facet_capabilities(payload.get("capabilities") or {}))
|
||||||
# Extra-Felder nur im gespeicherten Payload verwenden (für Filter), nicht in der Response
|
|
||||||
payload.update(facet)
|
|
||||||
|
|
||||||
# Upsert in Qdrant
|
|
||||||
qdrant.upsert(
|
qdrant.upsert(
|
||||||
collection_name=COLLECTION,
|
collection_name=COLLECTION,
|
||||||
points=[PointStruct(id=str(point_id), vector=vector, payload=payload)],
|
points=[PointStruct(id=str(point_id), vector=vector, payload=payload)],
|
||||||
|
|
@ -215,6 +306,57 @@ def get_exercise(exercise_id: str):
|
||||||
return Exercise(**_response_strip_extras(payload))
|
return Exercise(**_response_strip_extras(payload))
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/exercise/search", response_model=ExerciseSearchResponse)
|
||||||
|
def search_exercises(req: ExerciseSearchRequest) -> ExerciseSearchResponse:
|
||||||
|
_ensure_collection()
|
||||||
|
flt = _build_filter(req)
|
||||||
|
|
||||||
|
hits: List[ExerciseSearchHit] = []
|
||||||
|
if req.query:
|
||||||
|
vec = _make_vector_from_query(req.query)
|
||||||
|
# qdrant_client.search unterstützt offset/limit
|
||||||
|
res = qdrant.search(
|
||||||
|
collection_name=COLLECTION,
|
||||||
|
query_vector=vec,
|
||||||
|
limit=req.limit,
|
||||||
|
offset=req.offset,
|
||||||
|
query_filter=flt,
|
||||||
|
)
|
||||||
|
for h in res:
|
||||||
|
payload = dict(h.payload or {})
|
||||||
|
payload.setdefault("id", str(h.id))
|
||||||
|
hits.append(ExerciseSearchHit(id=str(h.id), score=float(h.score or 0.0), payload=Exercise(**_response_strip_extras(payload))))
|
||||||
|
else:
|
||||||
|
# Filter-only: per Scroll (ohne Score); einfache Paginierung via offset/limit
|
||||||
|
# Hole offset+limit Punkte und simuliere Score=None
|
||||||
|
collected = 0
|
||||||
|
skipped = 0
|
||||||
|
next_offset = None
|
||||||
|
while collected < req.limit:
|
||||||
|
page, next_offset = qdrant.scroll(
|
||||||
|
collection_name=COLLECTION,
|
||||||
|
scroll_filter=flt,
|
||||||
|
offset=next_offset,
|
||||||
|
limit=max(1, min(256, req.limit - collected + req.offset - skipped)),
|
||||||
|
with_payload=True,
|
||||||
|
)
|
||||||
|
if not page:
|
||||||
|
break
|
||||||
|
for pt in page:
|
||||||
|
if skipped < req.offset:
|
||||||
|
skipped += 1
|
||||||
|
continue
|
||||||
|
payload = dict(pt.payload or {})
|
||||||
|
payload.setdefault("id", str(pt.id))
|
||||||
|
hits.append(ExerciseSearchHit(id=str(pt.id), score=None, payload=Exercise(**_response_strip_extras(payload))))
|
||||||
|
collected += 1
|
||||||
|
if collected >= req.limit:
|
||||||
|
break
|
||||||
|
if next_offset is None:
|
||||||
|
break
|
||||||
|
return ExerciseSearchResponse(hits=hits)
|
||||||
|
|
||||||
|
|
||||||
@router.delete("/exercise/delete-by-external-id", response_model=DeleteResponse)
|
@router.delete("/exercise/delete-by-external-id", response_model=DeleteResponse)
|
||||||
def delete_by_external_id(external_id: str = Query(...)):
|
def delete_by_external_id(external_id: str = Query(...)):
|
||||||
_ensure_collection()
|
_ensure_collection()
|
||||||
|
|
@ -233,3 +375,41 @@ def delete_collection(collection: str = Query(default=COLLECTION)):
|
||||||
raise HTTPException(status_code=404, detail=f"Collection '{collection}' nicht gefunden.")
|
raise HTTPException(status_code=404, detail=f"Collection '{collection}' nicht gefunden.")
|
||||||
qdrant.delete_collection(collection_name=collection)
|
qdrant.delete_collection(collection_name=collection)
|
||||||
return DeleteResponse(status="🗑️ gelöscht", count=0, collection=collection)
|
return DeleteResponse(status="🗑️ gelöscht", count=0, collection=collection)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------
|
||||||
|
# OPTIONAL: einfacher Selbsttest (kannst du auch separat als Script verwenden)
|
||||||
|
# ---------------------------
|
||||||
|
TEST_DOC = """
|
||||||
|
Speicher als tests/test_exercise_search.py und mit pytest laufen lassen.
|
||||||
|
|
||||||
|
import os, requests
|
||||||
|
|
||||||
|
BASE = os.getenv("API_BASE", "http://localhost:8000")
|
||||||
|
|
||||||
|
# 1) Filter-only
|
||||||
|
r = requests.post(f"{BASE}/exercise/search", json={
|
||||||
|
"discipline": "Karate",
|
||||||
|
"max_duration": 12,
|
||||||
|
"equipment_any": ["Bälle"],
|
||||||
|
"capability_names": ["Reaktionsfähigkeit"],
|
||||||
|
"capability_ge_level": 2,
|
||||||
|
"limit": 5
|
||||||
|
})
|
||||||
|
r.raise_for_status()
|
||||||
|
js = r.json()
|
||||||
|
assert "hits" in js
|
||||||
|
for h in js["hits"]:
|
||||||
|
p = h["payload"]
|
||||||
|
assert p["discipline"] == "Karate"
|
||||||
|
assert p["duration_minutes"] <= 12
|
||||||
|
|
||||||
|
# 2) Vector + Filter
|
||||||
|
r = requests.post(f"{BASE}/exercise/search", json={
|
||||||
|
"query": "Aufwärmen 10min, Reaktionsfähigkeit, Teenager, Bälle",
|
||||||
|
"discipline": "Karate",
|
||||||
|
"limit": 3
|
||||||
|
})
|
||||||
|
r.raise_for_status()
|
||||||
|
js = r.json(); assert len(js["hits"]) <= 3
|
||||||
|
"""
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user