From 9be69ace5c125c3c41244e1adca2cc375a62d50c Mon Sep 17 00:00:00 2001 From: Lars Date: Fri, 22 May 2026 09:59:56 +0200 Subject: [PATCH] Enhance exercise_ai module with skill input sanitization and version update - Introduced a new constant `_MAX_SANITIZE_SKILL_INPUT_ROWS` to limit the number of skill entries processed, improving performance and preventing issues with excessively long skill arrays. - Updated the `_extract_json_array` and `_sanitize_skill_entries` functions to enforce this limit, ensuring that only a maximum of 250 skill entries are handled and that processing stops after 5 valid entries. - Incremented the application version to 0.8.155 and updated the changelog to reflect these changes, including a note on the improvements made to the AI endpoint for skill arrays. --- backend/exercise_ai.py | 18 +++++++++++++++--- backend/version.py | 11 +++++++++-- 2 files changed, 24 insertions(+), 5 deletions(-) diff --git a/backend/exercise_ai.py b/backend/exercise_ai.py index 255ad4c..eb48e2f 100644 --- a/backend/exercise_ai.py +++ b/backend/exercise_ai.py @@ -34,6 +34,7 @@ _TOKEN_FIND = re.compile(r"[a-zäöüß0-9]+", re.IGNORECASE) _MAX_PLAIN_FIELD = 28_000 _MAX_SKILLS_CATALOG_LINES = 240 _MAX_SUMMARY_CHARS = 220 +_MAX_SANITIZE_SKILL_INPUT_ROWS = 250 _FALLBACK_RETRIEVAL_CONFIG: Dict[str, Any] = { "version": 1, @@ -514,23 +515,34 @@ def _extract_json_array(text: str) -> Any: end = s.rfind("]") if end > 0: s = s[: end + 1] - return json.loads(s) + parsed = json.loads(s) + if isinstance(parsed, list) and len(parsed) > _MAX_SANITIZE_SKILL_INPUT_ROWS: + parsed = parsed[:_MAX_SANITIZE_SKILL_INPUT_ROWS] + return parsed if s.startswith("{"): obj = json.loads(s) if isinstance(obj, dict): for k in ("skills", "items", "data"): v = obj.get(k) if isinstance(v, list): + if len(v) > _MAX_SANITIZE_SKILL_INPUT_ROWS: + return v[:_MAX_SANITIZE_SKILL_INPUT_ROWS] return v raise ValueError("JSON-Objekt ohne Skills-Liste") - return json.loads(s) + parsed_end = json.loads(s) + if isinstance(parsed_end, list) and len(parsed_end) > _MAX_SANITIZE_SKILL_INPUT_ROWS: + return parsed_end[:_MAX_SANITIZE_SKILL_INPUT_ROWS] + return parsed_end def _sanitize_skill_entries(cur, rows: Any) -> List[Dict[str, Any]]: if not isinstance(rows, list): return [] out: List[Dict[str, Any]] = [] - for raw in rows: + cap = rows[:_MAX_SANITIZE_SKILL_INPUT_ROWS] + for raw in cap: + if len(out) >= 5: + break if not isinstance(raw, dict): continue sid = raw.get("skill_id") diff --git a/backend/version.py b/backend/version.py index eb40771..4407d29 100644 --- a/backend/version.py +++ b/backend/version.py @@ -1,6 +1,6 @@ # Shinkan Jinkendo Version Information -APP_VERSION = "0.8.154" +APP_VERSION = "0.8.155" BUILD_DATE = "2026-05-29" DB_SCHEMA_VERSION = "20260529068" @@ -23,7 +23,7 @@ MODULE_VERSIONS = { "skills": "0.1.1", # DB 065 karate_relevance + relevance_level; CRUD unterstützt Felder "skill_profiles": "1.0.0", # Phase 3: gewichtetes Fähigkeiten-Profil + skill-discovery/suggestions "methods": "0.1.0", - "exercises": "2.30.0", # Migration 068 ai_skill_retrieval_profiles; suggest focus_areas_context; exercise_ai Kontext-Katalog + Gewichtungen + "exercises": "2.30.1", # exercise_ai: Skills-JSON max. 250 Eintraege, Sanitize bricht nach 5 gueltigen ab (Performance) "training_units": "0.4.0", # POST .../publish-to-framework: Ablauf aus geplanter Einheit → Rahmen-Slot-Blueprint "training_programs": "0.1.0", "planning": "0.15.0", # Vorlagen: Strukturvorschau, Bearbeiten inkl. Split-Sessions + Beschreibung @@ -38,6 +38,13 @@ MODULE_VERSIONS = { } CHANGELOG = [ + { + "version": "0.8.155", + "date": "2026-05-29", + "changes": [ + "exercise_ai: Fix haengender KI-Endpunkt bei sehr langen Skill-Arrays vom Modell (Cap + frueher Abbruch nach 5 gueltigen Zeilen)", + ], + }, { "version": "0.8.154", "date": "2026-05-29",