llm-api/exercise_router.py aktualisiert
All checks were successful
Deploy Trainer_LLM to llm-node / deploy (push) Successful in 2s
All checks were successful
Deploy Trainer_LLM to llm-node / deploy (push) Successful in 2s
Soll dafür sorgen, dass in der qdrant Collection sogenannten capability Facetten ins Payload aufgenommen werden. Das ist deine aktuelle Version + folgende Verbesserungen: schreibt jetzt automatisch die Capability-Facetten ins Payload: capability_keys, capability_ge1, capability_ge2, capability_ge3 (für schnelle Qdrant-Filter). stabilisiert Listenfelder (keywords, equipment): trimmt, dedupliziert, sortiert → stabile Filter & Fingerprints. Upsert-Logik via external_id bleibt unverändert. Response bleibt kompatibel: zusätzliche Payload-Felder werden nicht zurückgegeben (wir filtern sie für die API-Antwort heraus).
This commit is contained in:
parent
d14ed9a511
commit
2a859aa16c
|
|
@ -1,4 +1,16 @@
|
||||||
# Test eines Kommentars, um die Funktion des gitea testen zu können
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
exercise_router.py – v1.6.0
|
||||||
|
|
||||||
|
Änderungen ggü. letzter Version:
|
||||||
|
- Upsert unverändert (external_id-Lookup), aber Payload wird stabilisiert
|
||||||
|
- **Neu:** Capability-Facetten werden beim Upsert automatisch geschrieben:
|
||||||
|
- capability_keys
|
||||||
|
- capability_ge1 / capability_ge2 / capability_ge3
|
||||||
|
- Normalisierung von Listenfeldern (keywords, equipment): Trim, Duplikate raus, sortiert → stabile Fingerprints/Filter
|
||||||
|
- Rückgabe (response_model=Exercise) bleibt unverändert; Extra-Payload-Felder werden nicht zurückgegeben
|
||||||
|
- Keine API-Signaturänderungen
|
||||||
|
"""
|
||||||
|
|
||||||
from fastapi import APIRouter, HTTPException, Query
|
from fastapi import APIRouter, HTTPException, Query
|
||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
|
|
@ -11,7 +23,6 @@ from qdrant_client.models import (
|
||||||
VectorParams,
|
VectorParams,
|
||||||
Distance,
|
Distance,
|
||||||
PointIdsList,
|
PointIdsList,
|
||||||
# NEW: für Filter-Queries (Lookup via external_id)
|
|
||||||
Filter, FieldCondition, MatchValue,
|
Filter, FieldCondition, MatchValue,
|
||||||
)
|
)
|
||||||
import os
|
import os
|
||||||
|
|
@ -23,13 +34,13 @@ router = APIRouter()
|
||||||
# =========================
|
# =========================
|
||||||
class Exercise(BaseModel):
|
class Exercise(BaseModel):
|
||||||
id: str = Field(default_factory=lambda: str(uuid4()))
|
id: str = Field(default_factory=lambda: str(uuid4()))
|
||||||
# NEW — optional, bricht vorhandene POST-Calls nicht
|
# Upsert-Metadaten
|
||||||
external_id: Optional[str] = None # z.B. "mw:12345"
|
external_id: Optional[str] = None # z.B. "mw:12345"
|
||||||
fingerprint: Optional[str] = None # sha256 über Kernfelder
|
fingerprint: Optional[str] = None # sha256 über Kernfelder
|
||||||
source: Optional[str] = None # Herkunft, z.B. "MediaWiki"
|
source: Optional[str] = None # Herkunft, z.B. "MediaWiki"
|
||||||
imported_at: Optional[datetime] = None # vom Import gesetzt
|
imported_at: Optional[datetime] = None # vom Import gesetzt
|
||||||
|
|
||||||
# Bestehende Felder (unverändert)
|
# Domain-Felder
|
||||||
title: str
|
title: str
|
||||||
summary: str
|
summary: str
|
||||||
short_description: str
|
short_description: str
|
||||||
|
|
@ -60,8 +71,9 @@ class DeleteResponse(BaseModel):
|
||||||
# =========================
|
# =========================
|
||||||
COLLECTION = os.getenv("EXERCISE_COLLECTION", "exercises")
|
COLLECTION = os.getenv("EXERCISE_COLLECTION", "exercises")
|
||||||
|
|
||||||
# CHANGED: Factorized to reuse for both create and update
|
|
||||||
def _ensure_collection():
|
def _ensure_collection():
|
||||||
|
"""Sicherstellen, dass die Collection existiert (kein Drop)."""
|
||||||
if not qdrant.collection_exists(COLLECTION):
|
if not qdrant.collection_exists(COLLECTION):
|
||||||
qdrant.recreate_collection(
|
qdrant.recreate_collection(
|
||||||
collection_name=COLLECTION,
|
collection_name=COLLECTION,
|
||||||
|
|
@ -71,32 +83,73 @@ def _ensure_collection():
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
# NEW: gemeinsamer Helper für external_id-Lookup
|
|
||||||
def _lookup_by_external_id(external_id: str) -> Optional[Dict[str, Any]]:
|
def _lookup_by_external_id(external_id: str) -> Optional[Dict[str, Any]]:
|
||||||
|
"""Einfaches Lookup via Payload-Filter."""
|
||||||
_ensure_collection()
|
_ensure_collection()
|
||||||
flt = Filter(must=[FieldCondition(key="external_id", match=MatchValue(value=external_id))])
|
flt = Filter(must=[FieldCondition(key="external_id", match=MatchValue(value=external_id))])
|
||||||
pts, _ = qdrant.scroll(collection_name=COLLECTION, scroll_filter=flt, limit=1)
|
pts, _ = qdrant.scroll(collection_name=COLLECTION, scroll_filter=flt, limit=1)
|
||||||
if not pts:
|
if not pts:
|
||||||
return None
|
return None
|
||||||
# qdrant_client liefert PointStruct; wir geben die payload + id zurück
|
doc = dict(pts[0].payload or {})
|
||||||
doc = pts[0].payload or {}
|
|
||||||
doc = dict(doc)
|
|
||||||
doc.setdefault("id", str(pts[0].id))
|
doc.setdefault("id", str(pts[0].id))
|
||||||
return doc
|
return doc
|
||||||
|
|
||||||
# NEW: konsistente Embedding-Erzeugung
|
|
||||||
_def_embed_text_fields = ("title", "summary", "short_description", "purpose", "execution", "notes")
|
_DEF_EMBED_FIELDS = ("title", "summary", "short_description", "purpose", "execution", "notes")
|
||||||
|
|
||||||
|
|
||||||
def _make_vector(ex: Exercise) -> List[float]:
|
def _make_vector(ex: Exercise) -> List[float]:
|
||||||
text = ". ".join([getattr(ex, f, "") for f in _def_embed_text_fields if getattr(ex, f, None)])
|
text = ". ".join([getattr(ex, f, "") for f in _DEF_EMBED_FIELDS if getattr(ex, f, None)])
|
||||||
# Achtung: model.encode muss synchron sein; sonst async anpassen
|
|
||||||
vec = model.encode(text).tolist()
|
vec = model.encode(text).tolist()
|
||||||
return vec
|
return vec
|
||||||
|
|
||||||
|
|
||||||
|
def _norm_list(xs: List[Any]) -> List[str]:
|
||||||
|
"""Trim + Duplikate entfernen + sortieren (stabil für Filter & Fingerprint)."""
|
||||||
|
out = []
|
||||||
|
seen = set()
|
||||||
|
for x in xs or []:
|
||||||
|
s = str(x).strip()
|
||||||
|
if not s:
|
||||||
|
continue
|
||||||
|
if s.lower() in seen:
|
||||||
|
continue
|
||||||
|
seen.add(s.lower())
|
||||||
|
out.append(s)
|
||||||
|
return sorted(out, key=str.casefold)
|
||||||
|
|
||||||
|
|
||||||
|
def _facet_capabilities(caps: Dict[str, Any]) -> Dict[str, List[str]]:
|
||||||
|
caps = caps or {}
|
||||||
|
def ge(n: int) -> List[str]:
|
||||||
|
out = []
|
||||||
|
for k, v in caps.items():
|
||||||
|
try:
|
||||||
|
if int(v) >= n:
|
||||||
|
out.append(str(k))
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
return sorted({s.strip() for s in out if s.strip()}, key=str.casefold)
|
||||||
|
all_keys = sorted({str(k).strip() for k in caps.keys() if str(k).strip()}, key=str.casefold)
|
||||||
|
return {
|
||||||
|
"capability_keys": all_keys,
|
||||||
|
"capability_ge1": ge(1),
|
||||||
|
"capability_ge2": ge(2),
|
||||||
|
"capability_ge3": ge(3),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _response_strip_extras(payload: Dict[str, Any]) -> Dict[str, Any]:
|
||||||
|
"""Nur Felder zurückgeben, die im Pydantic-Modell existieren (Extra-Felder bleiben im Qdrant-Payload, werden aber nicht serialisiert)."""
|
||||||
|
# Pydantic v2: model_fields enthält erlaubte Keys
|
||||||
|
allowed = set(Exercise.model_fields.keys())
|
||||||
|
return {k: v for k, v in payload.items() if k in allowed}
|
||||||
|
|
||||||
# =========================
|
# =========================
|
||||||
# Endpoints
|
# Endpoints
|
||||||
# =========================
|
# =========================
|
||||||
@router.get("/exercise/by-external-id") # NEW
|
@router.get("/exercise/by-external-id")
|
||||||
def get_exercise_by_external_id(external_id: str = Query(..., min_length=3)):
|
def get_exercise_by_external_id(external_id: str = Query(..., min_length=3)):
|
||||||
"""Lookup für Idempotenz im Importer. Liefert 404, wenn nicht vorhanden."""
|
"""Lookup für Idempotenz im Importer. Liefert 404, wenn nicht vorhanden."""
|
||||||
found = _lookup_by_external_id(external_id)
|
found = _lookup_by_external_id(external_id)
|
||||||
|
|
@ -104,30 +157,35 @@ def get_exercise_by_external_id(external_id: str = Query(..., min_length=3)):
|
||||||
raise HTTPException(status_code=404, detail="not found")
|
raise HTTPException(status_code=404, detail="not found")
|
||||||
return found
|
return found
|
||||||
|
|
||||||
|
|
||||||
@router.post("/exercise", response_model=Exercise)
|
@router.post("/exercise", response_model=Exercise)
|
||||||
def create_or_update_exercise(ex: Exercise):
|
def create_or_update_exercise(ex: Exercise):
|
||||||
"""
|
"""
|
||||||
CHANGED: Upsert-Semantik. Wenn `external_id` existiert und bereits in Qdrant gefunden wird,
|
Upsert-Semantik. Wenn `external_id` existiert und bereits in Qdrant gefunden wird,
|
||||||
wird dieselbe Point-ID überschrieben (echtes Update). Ansonsten neuer Eintrag.
|
wird dieselbe Point-ID überschrieben (echtes Update). Ansonsten neuer Eintrag.
|
||||||
API-Signatur bleibt identisch (POST /exercise, Body = Exercise).
|
API-Signatur bleibt identisch (POST /exercise, Body = Exercise).
|
||||||
"""
|
"""
|
||||||
_ensure_collection()
|
_ensure_collection()
|
||||||
|
|
||||||
# Default: neue Point-ID aus dem Exercise-Objekt
|
# Bestehende Point-ID übernehmen, falls external_id bereits vorhanden ist
|
||||||
point_id = ex.id
|
point_id = ex.id
|
||||||
|
|
||||||
# Wenn external_id gesetzt → prüfen, ob bereits vorhanden → Point-ID übernehmen
|
|
||||||
if ex.external_id:
|
if ex.external_id:
|
||||||
prior = _lookup_by_external_id(ex.external_id)
|
prior = _lookup_by_external_id(ex.external_id)
|
||||||
if prior:
|
if prior:
|
||||||
point_id = prior.get("id", point_id)
|
point_id = prior.get("id", point_id)
|
||||||
|
|
||||||
# Embedding berechnen
|
# Embedding
|
||||||
vector = _make_vector(ex)
|
vector = _make_vector(ex)
|
||||||
|
|
||||||
# Payload synchronisieren (id == point_id)
|
# Payload stabilisieren + Facetten einfügen
|
||||||
payload = ex.dict()
|
payload: Dict[str, Any] = ex.dict()
|
||||||
payload["id"] = str(point_id)
|
payload["id"] = str(point_id)
|
||||||
|
payload["keywords"] = _norm_list(payload.get("keywords") or [])
|
||||||
|
payload["equipment"] = _norm_list(payload.get("equipment") or [])
|
||||||
|
|
||||||
|
facet = _facet_capabilities(payload.get("capabilities") or {})
|
||||||
|
# Extra-Felder nur im gespeicherten Payload verwenden (für Filter), nicht in der Response
|
||||||
|
payload.update(facet)
|
||||||
|
|
||||||
# Upsert in Qdrant
|
# Upsert in Qdrant
|
||||||
qdrant.upsert(
|
qdrant.upsert(
|
||||||
|
|
@ -135,9 +193,9 @@ def create_or_update_exercise(ex: Exercise):
|
||||||
points=[PointStruct(id=str(point_id), vector=vector, payload=payload)],
|
points=[PointStruct(id=str(point_id), vector=vector, payload=payload)],
|
||||||
)
|
)
|
||||||
|
|
||||||
return Exercise(**payload)
|
return Exercise(**_response_strip_extras(payload))
|
||||||
|
|
||||||
|
|
||||||
# (Optional) – Einzel-Abruf per ID (falls bereits vorhanden, unverändert)
|
|
||||||
@router.get("/exercise/{exercise_id}", response_model=Exercise)
|
@router.get("/exercise/{exercise_id}", response_model=Exercise)
|
||||||
def get_exercise(exercise_id: str):
|
def get_exercise(exercise_id: str):
|
||||||
_ensure_collection()
|
_ensure_collection()
|
||||||
|
|
@ -150,9 +208,9 @@ def get_exercise(exercise_id: str):
|
||||||
raise HTTPException(status_code=404, detail="not found")
|
raise HTTPException(status_code=404, detail="not found")
|
||||||
payload = dict(pts[0].payload or {})
|
payload = dict(pts[0].payload or {})
|
||||||
payload.setdefault("id", str(pts[0].id))
|
payload.setdefault("id", str(pts[0].id))
|
||||||
return Exercise(**payload)
|
return Exercise(**_response_strip_extras(payload))
|
||||||
|
|
||||||
|
|
||||||
# Bestehende Admin-Utilities (Delete nach Filter / komplette Collection) – unverändert außer Nutzung von CONSTs
|
|
||||||
@router.delete("/exercise/delete-by-external-id", response_model=DeleteResponse)
|
@router.delete("/exercise/delete-by-external-id", response_model=DeleteResponse)
|
||||||
def delete_by_external_id(external_id: str = Query(...)):
|
def delete_by_external_id(external_id: str = Query(...)):
|
||||||
_ensure_collection()
|
_ensure_collection()
|
||||||
|
|
@ -164,6 +222,7 @@ def delete_by_external_id(external_id: str = Query(...)):
|
||||||
qdrant.delete(collection_name=COLLECTION, points_selector=PointIdsList(points=ids))
|
qdrant.delete(collection_name=COLLECTION, points_selector=PointIdsList(points=ids))
|
||||||
return DeleteResponse(status="🗑️ gelöscht", count=len(ids), collection=COLLECTION)
|
return DeleteResponse(status="🗑️ gelöscht", count=len(ids), collection=COLLECTION)
|
||||||
|
|
||||||
|
|
||||||
@router.delete("/exercise/delete-collection", response_model=DeleteResponse)
|
@router.delete("/exercise/delete-collection", response_model=DeleteResponse)
|
||||||
def delete_collection(collection: str = Query(default=COLLECTION)):
|
def delete_collection(collection: str = Query(default=COLLECTION)):
|
||||||
if not qdrant.collection_exists(collection):
|
if not qdrant.collection_exists(collection):
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user