Enhance exercise_ai module with skill input sanitization and version update
All checks were successful
Deploy Development / deploy (push) Successful in 42s
Test Suite / pytest-backend (push) Successful in 38s
Test Suite / lint-backend (push) Successful in 0s
Test Suite / build-frontend (push) Successful in 12s
Test Suite / k6 /health Baseline (push) Successful in 33s
Test Suite / playwright-tests (push) Successful in 1m16s

- Introduced a new constant `_MAX_SANITIZE_SKILL_INPUT_ROWS` to limit the number of skill entries processed, improving performance and preventing issues with excessively long skill arrays.
- Updated the `_extract_json_array` and `_sanitize_skill_entries` functions to enforce this limit, ensuring that only a maximum of 250 skill entries are handled and that processing stops after 5 valid entries.
- Incremented the application version to 0.8.155 and updated the changelog to reflect these changes, including a note on the improvements made to the AI endpoint for skill arrays.
This commit is contained in:
Lars 2026-05-22 09:59:56 +02:00
parent 286c36e9d7
commit 9be69ace5c
2 changed files with 24 additions and 5 deletions

View File

@ -34,6 +34,7 @@ _TOKEN_FIND = re.compile(r"[a-zäöüß0-9]+", re.IGNORECASE)
_MAX_PLAIN_FIELD = 28_000 _MAX_PLAIN_FIELD = 28_000
_MAX_SKILLS_CATALOG_LINES = 240 _MAX_SKILLS_CATALOG_LINES = 240
_MAX_SUMMARY_CHARS = 220 _MAX_SUMMARY_CHARS = 220
_MAX_SANITIZE_SKILL_INPUT_ROWS = 250
_FALLBACK_RETRIEVAL_CONFIG: Dict[str, Any] = { _FALLBACK_RETRIEVAL_CONFIG: Dict[str, Any] = {
"version": 1, "version": 1,
@ -514,23 +515,34 @@ def _extract_json_array(text: str) -> Any:
end = s.rfind("]") end = s.rfind("]")
if end > 0: if end > 0:
s = s[: end + 1] s = s[: end + 1]
return json.loads(s) parsed = json.loads(s)
if isinstance(parsed, list) and len(parsed) > _MAX_SANITIZE_SKILL_INPUT_ROWS:
parsed = parsed[:_MAX_SANITIZE_SKILL_INPUT_ROWS]
return parsed
if s.startswith("{"): if s.startswith("{"):
obj = json.loads(s) obj = json.loads(s)
if isinstance(obj, dict): if isinstance(obj, dict):
for k in ("skills", "items", "data"): for k in ("skills", "items", "data"):
v = obj.get(k) v = obj.get(k)
if isinstance(v, list): if isinstance(v, list):
if len(v) > _MAX_SANITIZE_SKILL_INPUT_ROWS:
return v[:_MAX_SANITIZE_SKILL_INPUT_ROWS]
return v return v
raise ValueError("JSON-Objekt ohne Skills-Liste") raise ValueError("JSON-Objekt ohne Skills-Liste")
return json.loads(s) parsed_end = json.loads(s)
if isinstance(parsed_end, list) and len(parsed_end) > _MAX_SANITIZE_SKILL_INPUT_ROWS:
return parsed_end[:_MAX_SANITIZE_SKILL_INPUT_ROWS]
return parsed_end
def _sanitize_skill_entries(cur, rows: Any) -> List[Dict[str, Any]]: def _sanitize_skill_entries(cur, rows: Any) -> List[Dict[str, Any]]:
if not isinstance(rows, list): if not isinstance(rows, list):
return [] return []
out: List[Dict[str, Any]] = [] out: List[Dict[str, Any]] = []
for raw in rows: cap = rows[:_MAX_SANITIZE_SKILL_INPUT_ROWS]
for raw in cap:
if len(out) >= 5:
break
if not isinstance(raw, dict): if not isinstance(raw, dict):
continue continue
sid = raw.get("skill_id") sid = raw.get("skill_id")

View File

@ -1,6 +1,6 @@
# Shinkan Jinkendo Version Information # Shinkan Jinkendo Version Information
APP_VERSION = "0.8.154" APP_VERSION = "0.8.155"
BUILD_DATE = "2026-05-29" BUILD_DATE = "2026-05-29"
DB_SCHEMA_VERSION = "20260529068" DB_SCHEMA_VERSION = "20260529068"
@ -23,7 +23,7 @@ MODULE_VERSIONS = {
"skills": "0.1.1", # DB 065 karate_relevance + relevance_level; CRUD unterstützt Felder "skills": "0.1.1", # DB 065 karate_relevance + relevance_level; CRUD unterstützt Felder
"skill_profiles": "1.0.0", # Phase 3: gewichtetes Fähigkeiten-Profil + skill-discovery/suggestions "skill_profiles": "1.0.0", # Phase 3: gewichtetes Fähigkeiten-Profil + skill-discovery/suggestions
"methods": "0.1.0", "methods": "0.1.0",
"exercises": "2.30.0", # Migration 068 ai_skill_retrieval_profiles; suggest focus_areas_context; exercise_ai Kontext-Katalog + Gewichtungen "exercises": "2.30.1", # exercise_ai: Skills-JSON max. 250 Eintraege, Sanitize bricht nach 5 gueltigen ab (Performance)
"training_units": "0.4.0", # POST .../publish-to-framework: Ablauf aus geplanter Einheit → Rahmen-Slot-Blueprint "training_units": "0.4.0", # POST .../publish-to-framework: Ablauf aus geplanter Einheit → Rahmen-Slot-Blueprint
"training_programs": "0.1.0", "training_programs": "0.1.0",
"planning": "0.15.0", # Vorlagen: Strukturvorschau, Bearbeiten inkl. Split-Sessions + Beschreibung "planning": "0.15.0", # Vorlagen: Strukturvorschau, Bearbeiten inkl. Split-Sessions + Beschreibung
@ -38,6 +38,13 @@ MODULE_VERSIONS = {
} }
CHANGELOG = [ CHANGELOG = [
{
"version": "0.8.155",
"date": "2026-05-29",
"changes": [
"exercise_ai: Fix haengender KI-Endpunkt bei sehr langen Skill-Arrays vom Modell (Cap + frueher Abbruch nach 5 gueltigen Zeilen)",
],
},
{ {
"version": "0.8.154", "version": "0.8.154",
"date": "2026-05-29", "date": "2026-05-29",