From d9eefcb1faba0aef31384f531451f7d3f26292ed Mon Sep 17 00:00:00 2001 From: Lars Date: Wed, 13 Aug 2025 07:39:31 +0200 Subject: [PATCH] llm-api/plan_router.py aktualisiert --- llm-api/plan_router.py | 338 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 328 insertions(+), 10 deletions(-) diff --git a/llm-api/plan_router.py b/llm-api/plan_router.py index 359a9aa..b38c238 100644 --- a/llm-api/plan_router.py +++ b/llm-api/plan_router.py @@ -1,14 +1,15 @@ # -*- coding: utf-8 -*- """ -plan_router.py – v0.12.2 (WP-15) +plan_router.py – v0.12.3 (WP-15) Minimal-CRUD für Plan-Templates & Pläne (POST/GET) + Idempotenz via Fingerprint. -NEU/Änderungen ggü. v0.12.0: -- GET /plan_templates (Liste/Filter) -- GET /plans (Liste/Filter) -- Fix: Filter `section` bei `/plans` nutzt materialisierte `plan_section_names` -- POST /plan materialisiert `plan_section_names` automatisch -- Ausführlichere Swagger-Doku + +Änderungen ggü. v0.12.0/0.12.1/0.12.2: +- NEU: GET /plan_templates – Liste & Filter (discipline, age_group, target_group, section, goal, keyword) +- NEU: GET /plans – Liste & Filter (created_by, discipline, age_group, target_group, goal, section, created_from/created_to) +- FIX/NEU: POST /plan materialisiert `plan_section_names` für robuste Section-Filter & Index +- Beibeh.: Idempotenz via fingerprint, optionale Strict-Checks (template/exercises) +- Swagger-Doku präzisiert """ from fastapi import APIRouter, HTTPException, Query from pydantic import BaseModel, Field @@ -39,8 +40,8 @@ class TemplateSection(BaseModel): name: str target_minutes: int must_keywords: List[str] = [] - ideal_keywords: List[str] = [] - supplement_keywords: List[str] = [] + ideal_keywords: List[str] = [] # optional (wünschenswert) + supplement_keywords: List[str] = [] # optional (ergänzend) forbid_keywords: List[str] = [] capability_targets: Dict[str, int] = {} @@ -100,6 +101,7 @@ class PlanList(BaseModel): # Helpers # ----------------- def _ensure_collection(name: str): + """Legt Collection an, wenn sie fehlt (analog exercise_router).""" if not qdrant.collection_exists(name): qdrant.recreate_collection( collection_name=name, @@ -107,6 +109,7 @@ def _ensure_collection(name: str): ) def _norm_list(xs: List[str]) -> List[str]: + """Trimmen, casefolded deduplizieren, stabil sortieren.""" seen, out = set(), [] for x in xs or []: s = str(x).strip() @@ -117,4 +120,319 @@ def _norm_list(xs: List[str]) -> List[str]: return sorted(out, key=str.casefold) def _template_embed_text(tpl: PlanTemplate) -> str: - par + parts = [tpl.name, tpl.discipline, tpl.age_group, tpl.target_group] + parts += tpl.goals + parts += [s.name for s in tpl.sections] + return ". ".join([p for p in parts if p]) + +def _plan_embed_text(p: Plan) -> str: + parts = [p.title, p.discipline, p.age_group, p.target_group] + parts += p.goals + parts += [s.name for s in p.sections] + return ". ".join([p for p in parts if p]) + +def _embed(text: str): + return model.encode(text or "").tolist() + +def _fingerprint_for_plan(p: Plan) -> str: + """sha256(title, total_minutes, sections.items.exercise_external_id, sections.items.duration)""" + core = { + "title": p.title, + "total_minutes": int(p.total_minutes), + "items": [ + {"exercise_external_id": it.exercise_external_id, "duration": int(it.duration)} + for sec in p.sections + for it in (sec.items or []) + ], + } + raw = json.dumps(core, sort_keys=True, ensure_ascii=False) + return hashlib.sha256(raw.encode("utf-8")).hexdigest() + +def _get_by_field(collection: str, key: str, value: Any) -> Optional[Dict[str, Any]]: + flt = Filter(must=[FieldCondition(key=key, match=MatchValue(value=value))]) + pts, _ = qdrant.scroll(collection_name=collection, scroll_filter=flt, limit=1, with_payload=True) + if not pts: + return None + payload = dict(pts[0].payload or {}) + payload.setdefault("id", str(pts[0].id)) + return payload + +def _as_model(model_cls, payload: Dict[str, Any]): + """Filtert unbekannte Payload-Felder heraus (Pydantic v1/v2 kompatibel).""" + fields = getattr(model_cls, "model_fields", None) or getattr(model_cls, "__fields__", {}) + allowed = set(fields.keys()) + data = {k: payload[k] for k in payload.keys() if k in allowed} + return model_cls(**data) + +def _truthy(val: Optional[str]) -> bool: + return str(val or "").strip().lower() in {"1", "true", "yes", "on"} + +def _exists_in_collection(collection: str, key: str, value: Any) -> bool: + flt = Filter(must=[FieldCondition(key=key, match=MatchValue(value=value))]) + pts, _ = qdrant.scroll(collection_name=collection, scroll_filter=flt, limit=1, with_payload=False) + return bool(pts) + +# ----------------- +# Endpoints: Templates +# ----------------- +@router.post( + "/plan_templates", + response_model=PlanTemplate, + summary="Create a plan template", + description=( + "Erstellt ein Plan-Template (Strukturplanung).\n\n" + "• Mehrere Sections erlaubt.\n" + "• Section-Felder: must/ideal/supplement/forbid keywords + capability_targets.\n" + "• Materialisierte Facettenfelder (section_*) werden intern geschrieben, um Qdrant-Filter zu beschleunigen." + ), +) +def create_plan_template(t: PlanTemplate): + _ensure_collection(PLAN_TEMPLATE_COLLECTION) + payload = t.model_dump() + payload["goals"] = _norm_list(payload.get("goals")) + sections = payload.get("sections", []) or [] + for s in sections: + s["must_keywords"] = _norm_list(s.get("must_keywords") or []) + s["ideal_keywords"] = _norm_list(s.get("ideal_keywords") or []) + s["supplement_keywords"] = _norm_list(s.get("supplement_keywords") or []) + s["forbid_keywords"] = _norm_list(s.get("forbid_keywords") or []) + + # Materialisierte Facetten (KEYWORD-Indizes) + payload["section_names"] = _norm_list([s.get("name", "") for s in sections]) + payload["section_must_keywords"] = _norm_list([kw for s in sections for kw in (s.get("must_keywords") or [])]) + payload["section_ideal_keywords"] = _norm_list([kw for s in sections for kw in (s.get("ideal_keywords") or [])]) + payload["section_supplement_keywords"] = _norm_list([kw for s in sections for kw in (s.get("supplement_keywords") or [])]) + payload["section_forbid_keywords"] = _norm_list([kw for s in sections for kw in (s.get("forbid_keywords") or [])]) + + vec = _embed(_template_embed_text(t)) + qdrant.upsert(collection_name=PLAN_TEMPLATE_COLLECTION, points=[PointStruct(id=str(t.id), vector=vec, payload=payload)]) + return t + +@router.get( + "/plan_templates/{tpl_id}", + response_model=PlanTemplate, + summary="Read a plan template by id", + description="Liest ein Template anhand seiner ID und gibt nur die Schemafelder zurück (zusätzliche Payload wird herausgefiltert).", +) +def get_plan_template(tpl_id: str): + _ensure_collection(PLAN_TEMPLATE_COLLECTION) + found = _get_by_field(PLAN_TEMPLATE_COLLECTION, "id", tpl_id) + if not found: + raise HTTPException(status_code=404, detail="not found") + return _as_model(PlanTemplate, found) + +@router.get( + "/plan_templates", + response_model=PlanTemplateList, + summary="List plan templates (filterable)", + description=( + "Listet Plan-Templates mit Filtern.\n\n" + "**Filter** (exakte Matches, KEYWORD-Felder):\n" + "- discipline, age_group, target_group\n" + "- section: Section-Name (nutzt materialisierte `section_names`)\n" + "- goal: Ziel (nutzt `goals`)\n" + "- keyword: trifft auf beliebige Section-Keyword-Felder (must/ideal/supplement/forbid).\n\n" + "**Pagination:** limit/offset. Feld `count` entspricht der Anzahl zurückgegebener Items (keine Gesamtsumme)." + ), +) +def list_plan_templates( + discipline: Optional[str] = Query(None, description="Filter: Disziplin (exaktes KEYWORD-Match)", example="Karate"), + age_group: Optional[str] = Query(None, description="Filter: Altersgruppe", example="Teenager"), + target_group: Optional[str] = Query(None, description="Filter: Zielgruppe", example="Breitensport"), + section: Optional[str] = Query(None, description="Filter: Section-Name (materialisiert)", example="Warmup"), + goal: Optional[str] = Query(None, description="Filter: Trainingsziel", example="Technik"), + keyword: Optional[str] = Query(None, description="Filter: Keyword in must/ideal/supplement/forbid", example="Koordination"), + limit: int = Query(20, ge=1, le=200, description="Max. Anzahl Items"), + offset: int = Query(0, ge=0, description="Start-Offset für Paging"), +): + _ensure_collection(PLAN_TEMPLATE_COLLECTION) + must: List[Any] = [] + should: List[Any] = [] + if discipline: + must.append(FieldCondition(key="discipline", match=MatchValue(value=discipline))) + if age_group: + must.append(FieldCondition(key="age_group", match=MatchValue(value=age_group))) + if target_group: + must.append(FieldCondition(key="target_group", match=MatchValue(value=target_group))) + if section: + must.append(FieldCondition(key="section_names", match=MatchValue(value=section))) + if goal: + must.append(FieldCondition(key="goals", match=MatchValue(value=goal))) + if keyword: + for k in ( + "section_must_keywords", + "section_ideal_keywords", + "section_supplement_keywords", + "section_forbid_keywords", + ): + should.append(FieldCondition(key=k, match=MatchValue(value=keyword))) + + flt = None + if must or should: + flt = Filter(must=must or None, should=should or None) + + fetch_n = max(offset + limit, 1) + pts, _ = qdrant.scroll(collection_name=PLAN_TEMPLATE_COLLECTION, scroll_filter=flt, limit=fetch_n, with_payload=True) + items: List[PlanTemplate] = [] + for p in pts[offset:offset+limit]: + payload = dict(p.payload or {}) + payload.setdefault("id", str(p.id)) + items.append(_as_model(PlanTemplate, payload)) + return PlanTemplateList(items=items, limit=limit, offset=offset, count=len(items)) + +# ----------------- +# Endpoints: Pläne +# ----------------- +@router.post( + "/plan", + response_model=Plan, + summary="Create a concrete training plan", + description=( + "Erstellt einen konkreten Trainingsplan.\n\n" + "Idempotenz: gleicher Fingerprint (title + items) → gleicher Plan (kein Duplikat).\n" + "Optional: Validierung von template_id und Exercises (Strict-Mode)." + ), +) +def create_plan(p: Plan): + _ensure_collection(PLAN_COLLECTION) + + # Template-Referenz prüfen (falls gesetzt) + if p.template_id: + if not _exists_in_collection(PLAN_TEMPLATE_COLLECTION, "id", p.template_id): + raise HTTPException(status_code=422, detail=f"Unknown template_id: {p.template_id}") + + # Optional: Strict-Mode – Exercises gegen EXERCISE_COLLECTION prüfen + if _truthy(os.getenv("PLAN_STRICT_EXERCISES")): + missing: List[str] = [] + for sec in p.sections or []: + for it in sec.items or []: + exid = (it.exercise_external_id or "").strip() + if exid and not _exists_in_collection(EXERCISE_COLLECTION, "external_id", exid): + missing.append(exid) + if missing: + raise HTTPException(status_code=422, detail={"error": "unknown exercise_external_id", "missing": sorted(set(missing))}) + + # Fingerprint + Idempotenz + fp = _fingerprint_for_plan(p) + p.fingerprint = p.fingerprint or fp + existing = _get_by_field(PLAN_COLLECTION, "fingerprint", p.fingerprint) + if existing: + return _as_model(Plan, existing) + + # Normalisieren + Materialisierung + p.goals = _norm_list(p.goals) + payload = p.model_dump() + if isinstance(payload.get("created_at"), datetime): + payload["created_at"] = payload["created_at"].astimezone(timezone.utc).isoformat() + # Materialisierte Section-Namen für robuste Filter/Indizes + try: + payload["plan_section_names"] = _norm_list([s.get("name", "") for s in (payload.get("sections") or [])]) + except Exception: + payload["plan_section_names"] = _norm_list([s.name for s in (p.sections or [])]) + + vec = _embed(_plan_embed_text(p)) + qdrant.upsert(collection_name=PLAN_COLLECTION, points=[PointStruct(id=str(p.id), vector=vec, payload=payload)]) + return p + +@router.get( + "/plan/{plan_id}", + response_model=Plan, + summary="Read a training plan by id", + description="Liest einen Plan anhand seiner ID. `created_at` wird (falls ISO-String) zu `datetime` geparst.", +) +def get_plan(plan_id: str): + _ensure_collection(PLAN_COLLECTION) + found = _get_by_field(PLAN_COLLECTION, "id", plan_id) + if not found: + raise HTTPException(status_code=404, detail="not found") + if isinstance(found.get("created_at"), str): + try: + found["created_at"] = datetime.fromisoformat(found["created_at"]) + except Exception: + pass + return _as_model(Plan, found) + +@router.get( + "/plans", + response_model=PlanList, + summary="List training plans (filterable)", + description=( + "Listet Trainingspläne mit Filtern.\n\n" + "**Filter** (exakte Matches, KEYWORD-Felder):\n" + "- created_by, discipline, age_group, target_group, goal\n" + "- section: Section-Name (nutzt materialisiertes `plan_section_names`)\n" + "- created_from / created_to: ISO-8601 Zeitfenster (lokal ausgewertet).\n\n" + "**Pagination:** limit/offset. Feld `count` entspricht der Anzahl zurückgegebener Items (keine Gesamtsumme)." + ), +) +def list_plans( + created_by: Optional[str] = Query(None, description="Filter: Ersteller", example="tester"), + discipline: Optional[str] = Query(None, description="Filter: Disziplin", example="Karate"), + age_group: Optional[str] = Query(None, description="Filter: Altersgruppe", example="Teenager"), + target_group: Optional[str] = Query(None, description="Filter: Zielgruppe", example="Breitensport"), + goal: Optional[str] = Query(None, description="Filter: Trainingsziel", example="Technik"), + section: Optional[str] = Query(None, description="Filter: Section-Name", example="Warmup"), + created_from: Optional[str] = Query(None, description="Ab-Zeitpunkt (ISO 8601, z. B. 2025-08-12T00:00:00Z)", example="2025-08-12T00:00:00Z"), + created_to: Optional[str] = Query(None, description="Bis-Zeitpunkt (ISO 8601)", example="2025-08-13T00:00:00Z"), + limit: int = Query(20, ge=1, le=200, description="Max. Anzahl Items"), + offset: int = Query(0, ge=0, description="Start-Offset für Paging"), +): + _ensure_collection(PLAN_COLLECTION) + must: List[Any] = [] + if created_by: + must.append(FieldCondition(key="created_by", match=MatchValue(value=created_by))) + if discipline: + must.append(FieldCondition(key="discipline", match=MatchValue(value=discipline))) + if age_group: + must.append(FieldCondition(key="age_group", match=MatchValue(value=age_group))) + if target_group: + must.append(FieldCondition(key="target_group", match=MatchValue(value=target_group))) + if goal: + must.append(FieldCondition(key="goals", match=MatchValue(value=goal))) + if section: + must.append(FieldCondition(key="plan_section_names", match=MatchValue(value=section))) + + flt = Filter(must=must or None) if must else None + + fetch_n = max(offset + limit, 1) + pts, _ = qdrant.scroll(collection_name=PLAN_COLLECTION, scroll_filter=flt, limit=fetch_n, with_payload=True) + + # optionales Zeitfenster lokal anwenden + def _in_window(py: Dict[str, Any]) -> bool: + if not (created_from or created_to): + return True + ts = py.get("created_at") + if isinstance(ts, dict) and ts.get("$date"): + ts = ts["$date"] + if isinstance(ts, str): + try: + dt = datetime.fromisoformat(ts.replace("Z", "+00:00")) + except Exception: + return False + elif isinstance(ts, datetime): + dt = ts + else: + return False + ok = True + if created_from: + try: + ok = ok and dt >= datetime.fromisoformat(created_from.replace("Z", "+00:00")) + except Exception: + pass + if created_to: + try: + ok = ok and dt <= datetime.fromisoformat(created_to.replace("Z", "+00:00")) + except Exception: + pass + return ok + + payloads: List[Dict[str, Any]] = [] + for p in pts: + py = dict(p.payload or {}) + py.setdefault("id", str(p.id)) + if _in_window(py): + payloads.append(py) + + sliced = payloads[offset:offset+limit] + items = [_as_model(Plan, x) for x in sliced] + return PlanList(items=items, limit=limit, offset=offset, count=len(items))