From 380b361e7060d183c05ddf2be1bea022fc4061f9 Mon Sep 17 00:00:00 2001 From: Lars Date: Mon, 11 Aug 2025 18:59:27 +0200 Subject: [PATCH] llm-api/exercise_router.py aktualisiert --- llm-api/exercise_router.py | 47 ++++++++++++++++++++++---------------- 1 file changed, 27 insertions(+), 20 deletions(-) diff --git a/llm-api/exercise_router.py b/llm-api/exercise_router.py index 3364741..c52d48f 100644 --- a/llm-api/exercise_router.py +++ b/llm-api/exercise_router.py @@ -1,15 +1,13 @@ # -*- coding: utf-8 -*- """ -exercise_router.py – v1.6.0 +exercise_router.py – v1.6.1 -Änderungen ggü. letzter Version: -- Upsert unverändert (external_id-Lookup), aber Payload wird stabilisiert -- **Neu:** Capability-Facetten werden beim Upsert automatisch geschrieben: - - capability_keys - - capability_ge1 / capability_ge2 / capability_ge3 -- Normalisierung von Listenfeldern (keywords, equipment): Trim, Duplikate raus, sortiert → stabile Fingerprints/Filter -- Rückgabe (response_model=Exercise) bleibt unverändert; Extra-Payload-Felder werden nicht zurückgegeben -- Keine API-Signaturänderungen +Änderungen ggü. v1.6.0: +- **Idempotenz-Fix:** Qdrant-Scroll liefert nun explizit den Payload (`WithPayloadSelector(enable=True)`) + für `/exercise/by-external-id` und `/exercise/{id}`. Dadurch kann der Importer den gespeicherten + Fingerprint korrekt gegen den Recalc-Hash prüfen (keine Phantom-Updates mehr). +- Capability-Facetten & Listen-Normalisierung wie in v1.6.0. +- Keine API-Signaturänderungen. """ from fastapi import APIRouter, HTTPException, Query @@ -23,7 +21,10 @@ from qdrant_client.models import ( VectorParams, Distance, PointIdsList, - Filter, FieldCondition, MatchValue, + Filter, + FieldCondition, + MatchValue, + WithPayloadSelector, ) import os @@ -38,7 +39,7 @@ class Exercise(BaseModel): external_id: Optional[str] = None # z.B. "mw:12345" fingerprint: Optional[str] = None # sha256 über Kernfelder source: Optional[str] = None # Herkunft, z.B. "MediaWiki" - imported_at: Optional[datetime] = None # vom Import gesetzt + imported_at: Optional[datetime] = None # vom Import gesetzt (ISO-String wird akzeptiert) # Domain-Felder title: str @@ -85,10 +86,15 @@ def _ensure_collection(): def _lookup_by_external_id(external_id: str) -> Optional[Dict[str, Any]]: - """Einfaches Lookup via Payload-Filter.""" + """Lookup via Payload-Filter. Liefert die gespeicherte Payload (mit allen Feldern).""" _ensure_collection() flt = Filter(must=[FieldCondition(key="external_id", match=MatchValue(value=external_id))]) - pts, _ = qdrant.scroll(collection_name=COLLECTION, scroll_filter=flt, limit=1) + pts, _ = qdrant.scroll( + collection_name=COLLECTION, + scroll_filter=flt, + limit=1, + with_payload=WithPayloadSelector(enable=True), + ) if not pts: return None doc = dict(pts[0].payload or {}) @@ -113,9 +119,10 @@ def _norm_list(xs: List[Any]) -> List[str]: s = str(x).strip() if not s: continue - if s.lower() in seen: + key = s.casefold() + if key in seen: continue - seen.add(s.lower()) + seen.add(key) out.append(s) return sorted(out, key=str.casefold) @@ -141,9 +148,8 @@ def _facet_capabilities(caps: Dict[str, Any]) -> Dict[str, List[str]]: def _response_strip_extras(payload: Dict[str, Any]) -> Dict[str, Any]: - """Nur Felder zurückgeben, die im Pydantic-Modell existieren (Extra-Felder bleiben im Qdrant-Payload, werden aber nicht serialisiert).""" - # Pydantic v2: model_fields enthält erlaubte Keys - allowed = set(Exercise.model_fields.keys()) + """Nur Felder zurückgeben, die im Pydantic-Modell existieren (Extra-Felder bleiben im Qdrant-Payload).""" + allowed = set(Exercise.model_fields.keys()) # Pydantic v2 return {k: v for k, v in payload.items() if k in allowed} # ========================= @@ -178,7 +184,7 @@ def create_or_update_exercise(ex: Exercise): vector = _make_vector(ex) # Payload stabilisieren + Facetten einfügen - payload: Dict[str, Any] = ex.dict() + payload: Dict[str, Any] = ex.model_dump() payload["id"] = str(point_id) payload["keywords"] = _norm_list(payload.get("keywords") or []) payload["equipment"] = _norm_list(payload.get("equipment") or []) @@ -203,6 +209,7 @@ def get_exercise(exercise_id: str): collection_name=COLLECTION, scroll_filter=Filter(must=[FieldCondition(key="id", match=MatchValue(value=exercise_id))]), limit=1, + with_payload=WithPayloadSelector(enable=True), ) if not pts: raise HTTPException(status_code=404, detail="not found") @@ -215,7 +222,7 @@ def get_exercise(exercise_id: str): def delete_by_external_id(external_id: str = Query(...)): _ensure_collection() flt = Filter(must=[FieldCondition(key="external_id", match=MatchValue(value=external_id))]) - pts, _ = qdrant.scroll(collection_name=COLLECTION, scroll_filter=flt, limit=10000) + pts, _ = qdrant.scroll(collection_name=COLLECTION, scroll_filter=flt, limit=10000, with_payload=WithPayloadSelector(enable=False)) ids = [str(p.id) for p in pts] if not ids: return DeleteResponse(status="🔍 Keine Einträge gefunden.", count=0, collection=COLLECTION)