From 249f1aeea07535698fc7f6099cc86168b3000d54 Mon Sep 17 00:00:00 2001 From: Lars Date: Wed, 13 Aug 2025 11:47:18 +0200 Subject: [PATCH] llm-api/plan_router.py aktualisiert --- llm-api/plan_router.py | 136 ++++++++++++++++++++++++----------------- 1 file changed, 79 insertions(+), 57 deletions(-) diff --git a/llm-api/plan_router.py b/llm-api/plan_router.py index bb6f101..a37b218 100644 --- a/llm-api/plan_router.py +++ b/llm-api/plan_router.py @@ -1,13 +1,12 @@ # -*- coding: utf-8 -*- """ -plan_router.py – v0.13.3 (WP-15) +plan_router.py – v0.13.4 (WP-15) -Minimal-CRUD + List/Filter für Templates & Pläne. - -Änderungen ggü. v0.13.2 -- /plans: Mehrseitiges Scrollen, bis mindestens offset+limit Treffer eingesammelt sind. -- Stabilisiert Zeitfenster-Filter in großen Collections; verhindert leere Resultate, - wenn gesuchte Items nicht auf der ersten Scroll-Seite liegen. +Änderungen ggü. v0.13.3 +- Idempotenter POST /plan: Wenn ein Plan mit gleichem Fingerprint existiert und die neue + Anfrage ein späteres `created_at` trägt, wird der gespeicherte Plan mit dem neueren + `created_at` und `created_at_ts` aktualisiert (kein Duplikat, aber zeitlich „frisch“). +- /plans: Mehrseitiges Scrollen bleibt aktiv; Zeitfenster-Filter robust (serverseitig + Fallback). """ from fastapi import APIRouter, HTTPException, Query from pydantic import BaseModel, Field @@ -41,8 +40,8 @@ class TemplateSection(BaseModel): name: str target_minutes: int must_keywords: List[str] = [] - ideal_keywords: List[str] = [] # wünschenswert - supplement_keywords: List[str] = [] # ergänzend + ideal_keywords: List[str] = [] + supplement_keywords: List[str] = [] forbid_keywords: List[str] = [] capability_targets: Dict[str, int] = {} @@ -147,14 +146,15 @@ def _fingerprint_for_plan(p: Plan) -> str: raw = json.dumps(core, sort_keys=True, ensure_ascii=False) return hashlib.sha256(raw.encode("utf-8")).hexdigest() -def _get_by_field(collection: str, key: str, value: Any) -> Optional[Dict[str, Any]]: +def _get_by_field(collection: str, key: str, value: Any): flt = Filter(must=[FieldCondition(key=key, match=MatchValue(value=value))]) pts, _ = qdrant.scroll(collection_name=collection, scroll_filter=flt, limit=1, with_payload=True) if not pts: return None - payload = dict(pts[0].payload or {}) - payload.setdefault("id", str(pts[0].id)) - return payload + point = pts[0] + payload = dict(point.payload or {}) + payload.setdefault("id", str(point.id)) + return {"id": point.id, "payload": payload} def _as_model(model_cls, payload: Dict[str, Any]): fields = getattr(model_cls, "model_fields", None) or getattr(model_cls, "__fields__", {}) @@ -170,8 +170,13 @@ def _exists_in_collection(collection: str, key: str, value: Any) -> bool: pts, _ = qdrant.scroll(collection_name=collection, scroll_filter=flt, limit=1, with_payload=False) return bool(pts) +def _parse_iso_to_ts(iso_str: str) -> float: + try: + return float(datetime.fromisoformat(iso_str.replace("Z", "+00:00")).timestamp()) + except Exception: + return float(datetime.now(timezone.utc).timestamp()) + def _scroll_collect(collection: str, flt: Optional[Filter], need: int, page: int = 256): - """Scrollt mehrere Seiten und sammelt mind. `need` Punkte ein (oder bis keine mehr kommen).""" out = [] offset = None page = max(1, min(page, 1024)) @@ -231,7 +236,7 @@ def get_plan_template(tpl_id: str): found = _get_by_field(PLAN_TEMPLATE_COLLECTION, "id", tpl_id) if not found: raise HTTPException(status_code=404, detail="not found") - return _as_model(PlanTemplate, found) + return _as_model(PlanTemplate, found["payload"]) @router.get( "/plan_templates", @@ -271,18 +276,10 @@ def list_plan_templates( if goal: must.append(FieldCondition(key="goals", match=MatchValue(value=goal))) if keyword: - for k in ( - "section_must_keywords", - "section_ideal_keywords", - "section_supplement_keywords", - "section_forbid_keywords", - ): + for k in ("section_must_keywords","section_ideal_keywords","section_supplement_keywords","section_forbid_keywords"): should.append(FieldCondition(key=k, match=MatchValue(value=keyword))) - flt = None - if must or should: - flt = Filter(must=must or None, should=should or None) - + flt = Filter(must=must or None, should=should or None) if (must or should) else None need = max(offset + limit, 1) pts = _scroll_collect(PLAN_TEMPLATE_COLLECTION, flt, need) items: List[PlanTemplate] = [] @@ -302,7 +299,7 @@ def list_plan_templates( description=( "Erstellt einen konkreten Trainingsplan.\n\n" "Idempotenz: gleicher Fingerprint (title + items) → gleicher Plan (kein Duplikat).\n" - "Optional: Validierung von template_id und Exercises (Strict-Mode)." + "Bei erneutem POST mit späterem `created_at` wird `created_at`/`created_at_ts` des bestehenden Plans aktualisiert." ), ) def create_plan(p: Plan): @@ -324,19 +321,13 @@ def create_plan(p: Plan): if missing: raise HTTPException(status_code=422, detail={"error": "unknown exercise_external_id", "missing": sorted(set(missing))}) - # Fingerprint + Idempotenz + # Fingerprint fp = _fingerprint_for_plan(p) p.fingerprint = p.fingerprint or fp - existing = _get_by_field(PLAN_COLLECTION, "fingerprint", p.fingerprint) - if existing: - return _as_model(Plan, existing) - # Normalisieren + Materialisierung - p.goals = _norm_list(p.goals) - payload = p.model_dump() - - # created_at → ISO + numerischer Zeitstempel (FLOAT) - dt = payload.get("created_at") + # Ziel-ISO + TS aus Request berechnen (auch wenn Duplikat) + req_payload = p.model_dump() + dt = req_payload.get("created_at") if isinstance(dt, datetime): dt = dt.astimezone(timezone.utc).isoformat() elif isinstance(dt, str): @@ -346,22 +337,53 @@ def create_plan(p: Plan): dt = datetime.now(timezone.utc).isoformat() else: dt = datetime.now(timezone.utc).isoformat() - payload["created_at"] = dt - try: - ts = datetime.fromisoformat(dt.replace("Z", "+00:00")).timestamp() - except Exception: - ts = datetime.now(timezone.utc).timestamp() - payload["created_at_ts"] = float(ts) + req_payload["created_at"] = dt + req_ts = _parse_iso_to_ts(dt) + req_payload["created_at_ts"] = float(req_ts) - # Materialisierte Section-Namen (robuste Filter/Indizes) - try: - payload["plan_section_names"] = _norm_list([ - (s.get("name") or "").strip() for s in (payload.get("sections") or []) if isinstance(s, dict) - ]) - except Exception: - payload["plan_section_names"] = _norm_list([ - (getattr(s, "name", "") or "").strip() for s in (p.sections or []) - ]) + # Dup-Check + existing = _get_by_field(PLAN_COLLECTION, "fingerprint", p.fingerprint) + if existing: + # Falls neues created_at später ist → gespeicherten Plan aktualisieren + cur = existing["payload"] + cur_ts = cur.get("created_at_ts") + if cur_ts is None: + cur_ts = _parse_iso_to_ts(str(cur.get("created_at", dt))) + if req_ts > float(cur_ts): + try: + qdrant.set_payload( + collection_name=PLAN_COLLECTION, + payload={"created_at": req_payload["created_at"], "created_at_ts": req_payload["created_at_ts"]}, + points=[existing["id"]], + ) + # Antwort-Objekt aktualisieren + cur["created_at"] = req_payload["created_at"] + cur["created_at_ts"] = req_payload["created_at_ts"] + except Exception: + pass + return _as_model(Plan, cur) + + # Neu anlegen + p.goals = _norm_list(p.goals) + payload = req_payload # enthält bereits korrektes created_at + created_at_ts + payload.update({ + "id": p.id, + "template_id": p.template_id, + "title": p.title, + "discipline": p.discipline, + "age_group": p.age_group, + "target_group": p.target_group, + "total_minutes": p.total_minutes, + "sections": [s.model_dump() for s in p.sections], + "goals": _norm_list(p.goals), + "capability_summary": p.capability_summary, + "novelty_against_last_n": p.novelty_against_last_n, + "fingerprint": p.fingerprint, + "created_by": p.created_by, + "source": p.source, + }) + # Section-Namen materialisieren + payload["plan_section_names"] = _norm_list([ (s.get("name") or "").strip() for s in (payload.get("sections") or []) if isinstance(s, dict) ]) vec = _embed(_plan_embed_text(p)) qdrant.upsert(collection_name=PLAN_COLLECTION, points=[PointStruct(id=str(p.id), vector=vec, payload=payload)]) @@ -378,12 +400,13 @@ def get_plan(plan_id: str): found = _get_by_field(PLAN_COLLECTION, "id", plan_id) if not found: raise HTTPException(status_code=404, detail="not found") - if isinstance(found.get("created_at"), str): + payload = found["payload"] + if isinstance(payload.get("created_at"), str): try: - found["created_at"] = datetime.fromisoformat(found["created_at"]) + payload["created_at"] = datetime.fromisoformat(payload["created_at"]) except Exception: pass - return _as_model(Plan, found) + return _as_model(Plan, payload) @router.get( "/plans", @@ -457,13 +480,12 @@ def list_plans( def _in_window(py: Dict[str, Any]) -> bool: if not (created_from or created_to): return True - # Wenn serverseitig Range aktiv war und Treffer kamen, brauchen wir keinen lokalen Check if applied_server_range and not fallback_local_time_check: - return True + return True # serverseitig bereits gefiltert ts = py.get("created_at") if isinstance(ts, dict) and ts.get("$date"): ts = ts["$date"] - if isinstance(ts, (int, float)) and py.get("created_at_ts") is not None: + if isinstance(py.get("created_at_ts"), (int, float)): dt = datetime.fromtimestamp(float(py["created_at_ts"]), tz=timezone.utc) elif isinstance(ts, str): try: