shinkan-jinkendo/backend/exercise_ai.py
Lars 9f4678f418
All checks were successful
Deploy Development / deploy (push) Successful in 44s
Test Suite / pytest-backend (push) Successful in 40s
Test Suite / lint-backend (push) Successful in 0s
Test Suite / build-frontend (push) Successful in 14s
Test Suite / k6 /health Baseline (push) Successful in 36s
Test Suite / playwright-tests (push) Successful in 1m16s
Implement exercise_instruction_rewrite for AI Prompt System
- Added `exercise_instruction_rewrite` functionality to enhance AI-generated instructions, incorporating fields for goal, execution, preparation, and trainer notes.
- Updated `ExerciseFormAiPromptContext` to include new fields and methods for instruction handling.
- Enhanced the `run_exercise_form_ai_suggestion` function to support instruction rewriting and validation.
- Modified API endpoints and frontend components to integrate instruction features, including a new button for AI instruction revision.
- Incremented application version to 0.8.163 and updated changelog to reflect these changes, including migration details and new functionality.
2026-05-22 18:53:36 +02:00

1106 lines
37 KiB
Python

"""
KI-Vorschlaege fuer Uebungsformular: Laedt Prompts aus ai_prompts, ruft OpenRouter auf.
Keine persistente Aenderung an exercises — nur Response-DTO fuer das Frontend.
Skill-Katalog fuer Prompts: priorisierte Auswahl (ai_skill_retrieval_profiles, Fallback-Heuristik).
"""
from __future__ import annotations
import copy
import html
import json
import logging
import math
import os
import re
from typing import Any, Dict, List, Mapping, MutableMapping, Optional, Sequence, Tuple
from fastapi import HTTPException
from openrouter_chat import (
OpenRouterError,
default_openrouter_model_id,
effective_openrouter_model_for_prompt_row,
normalize_openrouter_env,
openrouter_chat_completion,
)
from ai_prompt_context import ExerciseFormAiPromptContext
from ai_prompt_runtime import AiPromptUnavailableError, load_and_render_ai_prompt
from exercise_rich_text import collect_inline_exercise_media_ids, normalize_inline_exercise_media_markup
_LOGGER = logging.getLogger("shinkan.exercise_ai")
def _ai_debug_on() -> bool:
return os.getenv("SHINKAN_AI_DEBUG", "").strip().lower() in ("1", "true", "yes", "full")
_CANONICAL_SKILL_LEVELS = frozenset({"basis", "grundlagen", "aufbau", "fortgeschritten", "optimierung"})
_LEGACY_SKILL_LEVEL_SLUG = {
"einsteiger": "basis",
"experte": "optimierung",
"1": "basis",
"2": "grundlagen",
"3": "aufbau",
"4": "fortgeschritten",
"5": "optimierung",
}
_ALLOWED_SKILL_INTENSITY = frozenset({"niedrig", "mittel", "hoch"})
_TAG_RE = re.compile(r"<[^>]+>", re.IGNORECASE)
_TOKEN_FIND = re.compile(r"[a-zäöüß0-9]+", re.IGNORECASE)
_MAX_PLAIN_FIELD = 28_000
_MAX_SKILLS_CATALOG_LINES = 240
_MAX_SUMMARY_CHARS = 220
_MAX_SANITIZE_SKILL_INPUT_ROWS = 250
_FALLBACK_RETRIEVAL_CONFIG: Dict[str, Any] = {
"version": 1,
"importance_multiplier": 1.0,
"text_overlap_bonus": 2.0,
"main_slug_weights": {"karate": 1.0, "allgemeine": 1.0},
"category_slug_weights": {},
"category_max_share": {"kondition": 0.38, "koordination": 0.35},
"main_min_share": {},
"description_plain_max_len": 160,
"karate_relevance_max_len": 72,
"keyword_overrides": [],
}
def _normalize_exercise_skill_level(value) -> Optional[str]:
if value is None:
return None
s = str(value).strip().lower()
if not s:
return None
if s in _CANONICAL_SKILL_LEVELS:
return s
return _LEGACY_SKILL_LEVEL_SLUG.get(s)
def _normalize_exercise_skill_intensity(value) -> str:
if value is None:
return "mittel"
key = str(value).strip().lower()
if key in ("low",):
return "niedrig"
if key in ("medium",):
return "mittel"
if key in ("high",):
return "hoch"
if key in _ALLOWED_SKILL_INTENSITY:
return key
return "mittel"
def strip_html_to_plain(html: Optional[str], *, max_len: int = _MAX_PLAIN_FIELD) -> str:
if not html:
return ""
t = _TAG_RE.sub(" ", str(html))
t = re.sub(r"\s+", " ", t).strip()
if len(t) > max_len:
t = t[: max_len - 1].rstrip() + ""
return t
def _corpus_tokens(*parts: str) -> frozenset:
hay = " ".join(p.strip() for p in parts if p and p.strip())
ws = {_m.group(0).lower() for _m in _TOKEN_FIND.finditer(hay)}
return frozenset(w for w in ws if len(w) > 1)
def _ai_profiles_table_ready(cur) -> bool:
cur.execute("SELECT to_regclass(%s)::text AS t", ("public.ai_skill_retrieval_profiles",))
row = cur.fetchone()
if row is None:
return False
val = row["t"] if isinstance(row, dict) else row[0]
return val is not None and str(val).strip() != ""
def _average_float_dict(dicts: Sequence[Mapping[str, Any]], *, fallback: float) -> Dict[str, float]:
keys: set = set()
for d in dicts:
keys |= set(d.keys())
out: Dict[str, float] = {}
for k in keys:
vals = []
for d in dicts:
if k not in d or d[k] is None:
continue
try:
vals.append(float(d[k]))
except (TypeError, ValueError):
continue
out[k] = (sum(vals) / len(vals)) if vals else fallback
return out
def _merge_retrieval_configs(configs: Sequence[Dict[str, Any]]) -> Dict[str, Any]:
base = copy.deepcopy(_FALLBACK_RETRIEVAL_CONFIG)
if not configs:
return base
base["main_slug_weights"] = _average_float_dict(
[c.get("main_slug_weights") or {} for c in configs],
fallback=1.0,
)
for slug in ("karate", "allgemeine"):
base["main_slug_weights"].setdefault(slug, 1.0)
base["category_slug_weights"] = _average_float_dict(
[c.get("category_slug_weights") or {} for c in configs],
fallback=1.0,
)
base["category_max_share"] = _average_float_dict(
[c.get("category_max_share") or {} for c in configs],
fallback=1.0,
)
base["main_min_share"] = _average_float_dict(
[c.get("main_min_share") or {} for c in configs],
fallback=0.0,
)
ims = []
tbs = []
dmx = []
krm = []
for c in configs:
try:
if c.get("importance_multiplier") is not None:
ims.append(float(c["importance_multiplier"]))
except (TypeError, ValueError):
continue
try:
if c.get("text_overlap_bonus") is not None:
tbs.append(float(c["text_overlap_bonus"]))
except (TypeError, ValueError):
continue
try:
if c.get("description_plain_max_len") is not None:
dmx.append(int(c["description_plain_max_len"]))
except (TypeError, ValueError):
continue
try:
if c.get("karate_relevance_max_len") is not None:
krm.append(int(c["karate_relevance_max_len"]))
except (TypeError, ValueError):
continue
if ims:
base["importance_multiplier"] = sum(ims) / len(ims)
if tbs:
base["text_overlap_bonus"] = sum(tbs) / len(tbs)
if dmx:
base["description_plain_max_len"] = int(round(sum(dmx) / len(dmx)))
if krm:
base["karate_relevance_max_len"] = int(round(sum(krm) / len(krm)))
overrides: List[Any] = []
for c in configs:
overrides.extend(c.get("keyword_overrides") or [])
base["keyword_overrides"] = overrides
return base
def _mul_weight_dict(target: MutableMapping[str, float], patch: Mapping[str, Any]) -> None:
for k, v in patch.items():
try:
mul = float(v)
except (TypeError, ValueError):
continue
target[k] = float(target.get(k, 1.0)) * mul
def _apply_keyword_overrides(cfg: Dict[str, Any], corpus_lower: str) -> None:
caps = cfg.setdefault("category_max_share", {})
for ov in cfg.get("keyword_overrides") or []:
keys_any = ov.get("keywords_any") or []
if not keys_any or not corpus_lower.strip():
continue
hay = corpus_lower.lower() if corpus_lower else ""
hit = False
for kw in keys_any:
ks = str(kw or "").strip()
if not ks:
continue
ks_l = ks.lower()
hit = ks_l in hay
if hit:
break
if not hit:
continue
patch = ov.get("patch") or {}
_mul_weight_dict(cfg.setdefault("category_slug_weights", {}), patch.get("category_slug_weights") or {})
_mul_weight_dict(cfg.setdefault("main_slug_weights", {}), patch.get("main_slug_weights") or {})
for slug, mx in (patch.get("category_max_share") or {}).items():
try:
mx_f = float(mx)
except (TypeError, ValueError):
continue
cur = float(caps.get(slug, 1.0))
caps[slug] = min(cur, mx_f)
def _ordered_focus_ids(focus_ctx: Optional[Sequence[Tuple[int, bool]]]) -> List[int]:
"""Primär zuerst, dann stabil nach ID."""
if not focus_ctx:
return []
seen = set()
ordered: List[Tuple[int, bool]] = []
for fid, isp in sorted(focus_ctx, key=lambda x: (not x[1], x[0])):
try:
i = int(fid)
except (TypeError, ValueError):
continue
if i < 1 or i in seen:
continue
seen.add(i)
ordered.append((i, bool(isp)))
return [fid for fid, _ in ordered]
def _load_merged_retrieval_config(
cur, focus_ctx: Optional[Sequence[Tuple[int, bool]]]
) -> Dict[str, Any]:
if not _ai_profiles_table_ready(cur):
return copy.deepcopy(_FALLBACK_RETRIEVAL_CONFIG)
loaded: List[Dict[str, Any]] = []
for fid in _ordered_focus_ids(focus_ctx):
cur.execute(
"""
SELECT config
FROM ai_skill_retrieval_profiles
WHERE active = true AND focus_area_id = %s
LIMIT 1
""",
(fid,),
)
rw = cur.fetchone()
if not rw:
continue
raw = rw["config"] if isinstance(rw, dict) else rw[0]
if isinstance(raw, str):
try:
raw = json.loads(raw)
except json.JSONDecodeError:
continue
if isinstance(raw, dict):
loaded.append(raw)
if not loaded:
cur.execute(
"""
SELECT config
FROM ai_skill_retrieval_profiles
WHERE active = true AND is_default = true
LIMIT 1
"""
)
rw = cur.fetchone()
if rw:
raw = rw["config"] if isinstance(rw, dict) else rw[0]
if isinstance(raw, str):
try:
raw = json.loads(raw)
except json.JSONDecodeError:
raw = None
if isinstance(raw, dict):
loaded.append(raw)
return _merge_retrieval_configs(loaded)
def _fetch_all_active_skills_for_catalog(cur) -> List[Dict[str, Any]]:
cur.execute(
"""
SELECT s.id,
s.name,
s.category,
s.description,
s.karate_relevance,
s.relevance_level,
s.importance,
COALESCE(m.slug, '') AS main_slug,
COALESCE(c.slug, '') AS category_slug,
c.name AS subcategory_name
FROM skills s
LEFT JOIN skill_main_categories m ON m.id = s.main_category_id
LEFT JOIN skill_categories c ON c.id = s.category_id
WHERE (s.status IS NULL OR s.status = 'active')
"""
)
return [dict(r) for r in cur.fetchall()]
def _score_skill_row(
row: Mapping[str, Any],
cfg: Mapping[str, Any],
corpus_tokens: frozenset,
) -> float:
main_slug = str(row.get("main_slug") or "").strip().lower()
cat_slug = str(row.get("category_slug") or "").strip().lower()
main_w = float((cfg.get("main_slug_weights") or {}).get(main_slug, 1.0))
cat_w = float((cfg.get("category_slug_weights") or {}).get(cat_slug, 1.0))
try:
imp = int(row["importance"]) if row.get("importance") is not None else 3
except (TypeError, ValueError):
imp = 3
imp = max(1, min(5, imp))
imp_mult = float(cfg.get("importance_multiplier") or 1.0)
base = float(imp) * imp_mult * max(main_w, 0.05) * max(cat_w, 0.05)
name = strip_html_to_plain(row.get("name"), max_len=400)
dsc = strip_html_to_plain(row.get("description"), max_len=520)
search_blob = " ".join(
[
name,
dsc,
cat_slug.replace("_", " "),
str(row.get("category") or ""),
str(row.get("subcategory_name") or ""),
]
).lower()
overlaps = sum(1 for t in corpus_tokens if t and t in search_blob)
tob = float(cfg.get("text_overlap_bonus") or 0.0)
return base + overlaps * tob
def _category_cap_limits(cfg: Mapping[str, Any], n_max: int) -> Dict[str, int]:
out: Dict[str, int] = {}
mx = cfg.get("category_max_share") or {}
if not isinstance(mx, dict):
return out
for slug, raw in mx.items():
ks = str(slug or "").strip()
if not ks:
continue
try:
sh = float(raw)
except (TypeError, ValueError):
continue
if 0 < sh < 1.0:
out[ks] = max(1, int(math.floor(sh * n_max)))
elif sh >= 1.0:
out[ks] = n_max + 99999
else:
continue
return out
def _pick_catalog_rows(rows_scored: List[Tuple[float, Dict[str, Any]]], cfg: Mapping[str, Any]) -> List[Dict[str, Any]]:
"""rows_scored: (score, row_dict) ohne Sortierung-Anforderung."""
cap_limits = _category_cap_limits(cfg, _MAX_SKILLS_CATALOG_LINES)
ordered = sorted(rows_scored, key=lambda x: (-x[0], str(x[1].get("name") or "")))
picked: List[Dict[str, Any]] = []
picked_ids: set = set()
cat_counts: Dict[str, int] = {}
def under_cap(cat_slug: str) -> bool:
if not cat_slug or cat_slug not in cap_limits:
return True
return cat_counts.get(cat_slug, 0) < cap_limits[cat_slug]
# Pass 1: Cap respektieren
for _sc, rw in ordered:
if len(picked) >= _MAX_SKILLS_CATALOG_LINES:
break
sid = rw["id"]
if sid in picked_ids:
continue
cslug = str(rw.get("category_slug") or "").strip().lower()
if cslug and not under_cap(cslug):
continue
picked.append(rw)
picked_ids.add(sid)
if cslug:
cat_counts[cslug] = cat_counts.get(cslug, 0) + 1
# Pass 2: auffüllen
if len(picked) < _MAX_SKILLS_CATALOG_LINES:
for _sc, rw in ordered:
if len(picked) >= _MAX_SKILLS_CATALOG_LINES:
break
sid = rw["id"]
if sid in picked_ids:
continue
picked.append(rw)
picked_ids.add(sid)
return picked[:_MAX_SKILLS_CATALOG_LINES]
def _format_skill_catalog_line(row: Mapping[str, Any], cfg: Mapping[str, Any]) -> str:
rid = int(row["id"])
nm = (row.get("name") or "").strip() or f"Skill #{rid}"
cat_legacy = str(row.get("category") or "").strip()
sub = str(row.get("subcategory_name") or "").strip()
main_slug = str(row.get("main_slug") or "").strip()
cats = " / ".join(x for x in (main_slug.upper() if main_slug else "", cat_legacy, sub) if x)
dmax = int(cfg.get("description_plain_max_len") or 160)
dsc = strip_html_to_plain(row.get("description"), max_len=max(40, min(400, dmax)))
krmax = int(cfg.get("karate_relevance_max_len") or 0)
kr = strip_html_to_plain(row.get("karate_relevance"), max_len=min(280, krmax)) if krmax > 0 else ""
rel = row.get("relevance_level")
rel_s = str(rel).strip() if rel is not None else ""
parts = [
f"- id={rid} | name={nm}",
f" | kategorie={cats or '-'}",
f" | beschreibung={dsc or '-'}",
]
if krmax > 0 and (kr.strip() or rel_s):
parts.append(f" | karate_relevanz={kr or '-'} | relevanz_stufe={rel_s or '-'}")
return "".join(parts)
def _safe_int_importance(value: Any) -> int:
try:
iv = int(value)
except (TypeError, ValueError):
return 0
return max(1, min(5, iv)) if iv else 0
def build_contextual_skills_catalog_block(
cur,
*,
title: Optional[str],
goal_plain: str,
execution_plain: str,
focus_hint: Optional[str],
focus_ctx: Optional[Sequence[Tuple[int, bool]]],
) -> str:
cfg = _load_merged_retrieval_config(cur, focus_ctx)
corpus_lower = " ".join([title or "", goal_plain or "", execution_plain or "", focus_hint or ""]).lower()
_apply_keyword_overrides(cfg, corpus_lower)
tok = _corpus_tokens(title or "", goal_plain, execution_plain, focus_hint or "")
skill_rows = _fetch_all_active_skills_for_catalog(cur)
scored: List[Tuple[float, Dict[str, Any]]] = []
for r in skill_rows:
scored.append((_score_skill_row(r, cfg, tok), r))
picked = _pick_catalog_rows(scored, cfg)
picked.sort(
key=lambda r: (
-_safe_int_importance(r.get("importance")),
str(r.get("name") or "").lower(),
)
)
lines = [_format_skill_catalog_line(row, cfg) for row in picked]
return "\n".join(lines) if lines else "(keine aktiven Skills im Katalog)"
_MAX_INSTRUCTION_GOAL_PLAIN = 4_000
_MAX_INSTRUCTION_EXECUTION_PLAIN = 12_000
_MAX_INSTRUCTION_PREP_PLAIN = 2_500
_MAX_INSTRUCTION_TRAINER_PLAIN = 2_500
_INSTRUCTION_JSON_KEYS = ("goal", "execution", "preparation", "trainer_notes")
_INSTRUCTION_FIELD_MAX_PLAIN = {
"goal": _MAX_INSTRUCTION_GOAL_PLAIN,
"execution": _MAX_INSTRUCTION_EXECUTION_PLAIN,
"preparation": _MAX_INSTRUCTION_PREP_PLAIN,
"trainer_notes": _MAX_INSTRUCTION_TRAINER_PLAIN,
}
_DISALLOWED_HTML_TAG_RE = re.compile(
r"</?\s*(?!p\b|ul\b|ol\b|li\b|strong\b|b\b|em\b|i\b|br\b|span\b)[a-zA-Z][^>]*>",
re.IGNORECASE,
)
_SCRIPT_STYLE_RE = re.compile(r"(?is)<(script|style)[^>]*>.*?</\1>")
def _plain_to_minimal_instruction_html(text: str) -> str:
raw = (text or "").strip()
if not raw:
return ""
parts = [p.strip() for p in re.split(r"\n+", raw) if p.strip()]
if not parts:
return ""
return "".join(f"<p>{html.escape(p)}</p>" for p in parts)
def _truncate_plain(text: str, max_len: int) -> str:
t = (text or "").strip()
if len(t) <= max_len:
return t
return t[: max_len - 1].rstrip() + ""
def _sanitize_instruction_field_html(raw: Any, *, max_plain: int) -> str:
if raw is None:
return ""
s = str(raw).strip()
if not s:
return ""
if s.startswith("```"):
s = re.sub(r"^```[a-zA-Z0-9]*\s*", "", s)
if s.endswith("```"):
s = s[:-3].strip()
s = _SCRIPT_STYLE_RE.sub("", s)
s = _DISALLOWED_HTML_TAG_RE.sub("", s)
if "<" not in s:
s = _plain_to_minimal_instruction_html(s)
else:
s = normalize_inline_exercise_media_markup(s) or ""
plain = strip_html_to_plain(s, max_len=max_plain + 200)
if len(plain) > max_plain:
plain = _truncate_plain(plain, max_plain)
s = _plain_to_minimal_instruction_html(plain)
return (normalize_inline_exercise_media_markup(s) or "").strip()
def _merge_preserved_inline_media(original: Optional[str], revised: str) -> str:
"""Haengt fehlende Medien-Verweise aus dem Ausgangstext ans Ende an."""
out = (revised or "").strip()
orig_ids = collect_inline_exercise_media_ids(original)
if not orig_ids:
return out
new_ids = collect_inline_exercise_media_ids(out)
missing = sorted(orig_ids - new_ids)
if not missing:
return out
spans = []
for mid in missing:
spans.append(
f'<span data-shinkan-exercise-media="{mid}" data-shinkan-exercise-media-size="medium" '
f'class="shinkan-inline-media"></span>'
)
block = f"<p>{''.join(spans)}</p>"
return (out + block).strip() if out else block
def _first_balanced_json_object(text: str) -> Optional[str]:
i = text.find("{")
if i < 0:
return None
depth = 0
in_str = False
esc = False
for j in range(i, len(text)):
ch = text[j]
if in_str:
if esc:
esc = False
elif ch == "\\":
esc = True
elif ch == '"':
in_str = False
continue
if ch == '"':
in_str = True
continue
if ch == "{":
depth += 1
elif ch == "}":
depth -= 1
if depth == 0:
return text[i : j + 1]
return None
def _extract_instruction_rewrite_object(text: str) -> Dict[str, Any]:
s = (text or "").strip()
if not s:
raise ValueError("leer")
if s.startswith("```"):
s = re.sub(r"^```[a-zA-Z0-9]*\s*", "", s)
if s.endswith("```"):
s = s[:-3].strip()
frag = _first_balanced_json_object(s)
if frag:
s = frag
obj = json.loads(s)
if not isinstance(obj, dict):
raise ValueError("kein JSON-Objekt")
return obj
def _sanitize_instruction_rewrite_payload(
parsed: Mapping[str, Any],
*,
originals: Mapping[str, Optional[str]],
) -> Dict[str, str]:
out: Dict[str, str] = {}
for key in _INSTRUCTION_JSON_KEYS:
max_plain = _INSTRUCTION_FIELD_MAX_PLAIN[key]
html = _sanitize_instruction_field_html(parsed.get(key), max_plain=max_plain)
html = _merge_preserved_inline_media(originals.get(key), html)
out[key] = html
return out
def build_exercise_placeholder_variables(
cur,
*,
slug: str,
title: Optional[str],
goal: Optional[str],
execution: Optional[str],
focus_area_hint: Optional[str],
focus_areas_context: Optional[Sequence[Tuple[int, bool]]],
preparation: Optional[str] = None,
trainer_notes: Optional[str] = None,
) -> Dict[str, str]:
"""
Baut die Variable-Map fuer {{platzhalter}} passend zur Slug fuer Uebungs-KI.
"""
s = (slug or "").strip().lower()
if s == "pipeline":
return {}
g_plain = strip_html_to_plain(goal)
e_plain = strip_html_to_plain(execution)
p_plain = strip_html_to_plain(preparation)
n_plain = strip_html_to_plain(trainer_notes)
t_title = (title or "").strip()
focus = (focus_area_hint or "").strip()
ctx: Dict[str, str] = {
"exercise_title": t_title or "-",
"exercise_focus_area": focus or "-",
"exercise_goal": g_plain or "-",
"exercise_execution": e_plain or "-",
"exercise_preparation": p_plain or "-",
"exercise_trainer_notes": n_plain or "-",
}
if s == "exercise_summary":
return {k: ctx[k] for k in ("exercise_title", "exercise_focus_area", "exercise_goal", "exercise_execution")}
if s == "exercise_instruction_rewrite":
return ctx
if s == "exercise_skill_suggestions":
catalog = build_contextual_skills_catalog_block(
cur,
title=t_title,
goal_plain=g_plain,
execution_plain=e_plain,
focus_hint=focus or None,
focus_ctx=focus_areas_context,
)
ctx["skills_catalog"] = catalog
return ctx
raise ValueError(f"Kein Platzhalter-Kontext fuer slug={slug!r} definiert.")
def _first_balanced_json_array(text: str) -> Optional[str]:
"""Findet das erste vollständig geschlossene Top-Level-JSON-Array in beliebigem Fließtext."""
i = text.find("[")
if i < 0:
return None
depth = 0
in_str = False
esc = False
for j in range(i, len(text)):
ch = text[j]
if in_str:
if esc:
esc = False
elif ch == "\\":
esc = True
elif ch == '"':
in_str = False
continue
if ch == '"':
in_str = True
continue
if ch == "[":
depth += 1
elif ch == "]":
depth -= 1
if depth == 0:
return text[i : j + 1]
return None
def _extract_json_array(text: str) -> Any:
s = text.strip()
if s.startswith("```"):
s = re.sub(r"^```[a-zA-Z0-9]*\s*", "", s)
if s.endswith("```"):
s = s[:-3].strip()
if s.startswith("["):
end = s.rfind("]")
if end > 0:
s = s[: end + 1]
parsed = json.loads(s)
if isinstance(parsed, list) and len(parsed) > _MAX_SANITIZE_SKILL_INPUT_ROWS:
parsed = parsed[:_MAX_SANITIZE_SKILL_INPUT_ROWS]
return parsed
if s.startswith("{"):
obj = json.loads(s)
if isinstance(obj, dict):
for k in ("skills", "items", "data"):
v = obj.get(k)
if isinstance(v, list):
if len(v) > _MAX_SANITIZE_SKILL_INPUT_ROWS:
return v[:_MAX_SANITIZE_SKILL_INPUT_ROWS]
return v
raise ValueError("JSON-Objekt ohne Skills-Liste")
parsed_end = json.loads(s)
if isinstance(parsed_end, list) and len(parsed_end) > _MAX_SANITIZE_SKILL_INPUT_ROWS:
return parsed_end[:_MAX_SANITIZE_SKILL_INPUT_ROWS]
return parsed_end
def _sanitize_skill_entries(cur, rows: Any) -> List[Dict[str, Any]]:
if not isinstance(rows, list):
return []
out: List[Dict[str, Any]] = []
cap = rows[:_MAX_SANITIZE_SKILL_INPUT_ROWS]
for raw in cap:
if len(out) >= 5:
break
if not isinstance(raw, dict):
continue
sid = raw.get("skill_id")
try:
skill_id = int(sid)
except (TypeError, ValueError):
continue
cur.execute(
"""
SELECT s.id, s.name, s.category,
sc.name AS subcategory_name
FROM skills s
LEFT JOIN skill_categories sc ON s.category_id = sc.id
WHERE s.id = %s AND (s.status IS NULL OR s.status = 'active')
""",
(skill_id,),
)
sk = cur.fetchone()
if not sk:
continue
req = _normalize_exercise_skill_level(raw.get("required_level")) or "grundlagen"
tgt = _normalize_exercise_skill_level(raw.get("target_level")) or req
if req not in _CANONICAL_SKILL_LEVELS:
req = _LEGACY_SKILL_LEVEL_SLUG.get(str(raw.get("required_level") or "").strip().lower(), "grundlagen")
if req not in _CANONICAL_SKILL_LEVELS:
req = "grundlagen"
if tgt not in _CANONICAL_SKILL_LEVELS:
tgt = _LEGACY_SKILL_LEVEL_SLUG.get(str(raw.get("target_level") or "").strip().lower(), req)
if tgt not in _CANONICAL_SKILL_LEVELS:
tgt = req
inten = _normalize_exercise_skill_intensity(raw.get("intensity"))
is_primary = bool(raw.get("is_primary")) if raw.get("is_primary") is not None else len(out) == 0
cat = (sk.get("category") or "").strip()
sub = (sk.get("subcategory_name") or "").strip()
skill_category = " / ".join(x for x in (cat, sub) if x) or (cat or None)
conf = raw.get("confidence")
try:
conf_f = float(conf) if conf is not None else None
except (TypeError, ValueError):
conf_f = None
item: Dict[str, Any] = {
"skill_id": skill_id,
"skill_name": (sk.get("name") or "").strip() or f"Skill #{skill_id}",
"required_level": req,
"target_level": tgt,
"intensity": inten,
"is_primary": is_primary,
}
if skill_category:
item["skill_category"] = skill_category
if conf_f is not None:
item["confidence"] = conf_f
out.append(item)
return out[:5]
def _require_openrouter_key() -> str:
key, _ = normalize_openrouter_env()
if not key:
raise HTTPException(
status_code=503,
detail="KI nicht konfiguriert (OPENROUTER_API_KEY fehlt).",
)
return key
def run_exercise_ai_suggestion(
cur,
*,
form_ctx: ExerciseFormAiPromptContext,
want_summary: bool,
want_skills: bool,
want_instructions: bool = False,
) -> Dict[str, Any]:
key = _require_openrouter_key()
title = form_ctx.title
goal = form_ctx.goal
execution = form_ctx.execution
preparation = form_ctx.preparation
trainer_notes = form_ctx.trainer_notes
focus_area_hint = form_ctx.focus_hint
focus_areas_context = form_ctx.focus_area_tuples()
g_plain = strip_html_to_plain(goal)
e_plain = strip_html_to_plain(execution)
if want_instructions:
if not form_ctx.has_instruction_source_text():
raise HTTPException(
status_code=400,
detail="Fuer Anleitungs-Ueberarbeitung mindestens Titel oder ein Anleitungsfeld ausfuellen.",
)
elif not (g_plain.strip() or e_plain.strip()):
raise HTTPException(
status_code=400,
detail="Mindestens Ziel oder Durchfuehrung muss Inhalt liefern (nach Entfernen von leerem HTML).",
)
t_title = (title or "").strip()
focus = (focus_area_hint or "").strip()
result: Dict[str, Any] = {}
models_by_slug: Dict[str, str] = {}
if _ai_debug_on():
fid_list = ",".join(str(x) for x in _ordered_focus_ids(focus_areas_context))
_LOGGER.warning(
"AI_DEBUG exercise_ai suggest want_summary=%s want_skills=%s want_instructions=%s "
"title_chars=%s goal_plain_chars=%s exec_plain_chars=%s focus_hint_chars=%s focus_ctx_ids=[%s]",
want_summary,
want_skills,
want_instructions,
len((title or "").strip()),
len(g_plain),
len(e_plain),
len((focus_area_hint or "").strip()),
fid_list,
)
if want_summary:
try:
ctx = build_exercise_placeholder_variables(
cur,
slug="exercise_summary",
title=title,
goal=goal,
execution=execution,
focus_area_hint=focus_area_hint,
focus_areas_context=focus_areas_context,
)
except ValueError as e:
raise HTTPException(status_code=500, detail=str(e)) from e
try:
prow, rendered = load_and_render_ai_prompt(cur, "exercise_summary", ctx)
except AiPromptUnavailableError:
raise HTTPException(
status_code=503,
detail="Prompt exercise_summary nicht aktiv oder fehlt in DB.",
) from None
model_summary = effective_openrouter_model_for_prompt_row(prow)
models_by_slug["exercise_summary"] = model_summary
prompt = rendered.text
if _ai_debug_on():
_LOGGER.warning(
"AI_DEBUG exercise_ai summary prompt_slug=exercise_summary prompt_chars=%s placeholders_remaining=%s",
len(prompt),
len(rendered.placeholders_remaining),
)
try:
raw = openrouter_chat_completion(api_key=key, model=model_summary, user_content=prompt)
except OpenRouterError as e:
raise HTTPException(status_code=502, detail=f"OpenRouter: {e}") from e
if _ai_debug_on():
_LOGGER.warning("AI_DEBUG exercise_ai summary response_chars=%s", len(raw or ""))
text = (raw or "").strip()
if not text:
raise HTTPException(
status_code=502,
detail="OpenRouter/KI lieferte eine leere Kurzfassung (kein Modelltext).",
)
if len(text) > _MAX_SUMMARY_CHARS:
text = text[: _MAX_SUMMARY_CHARS - 1].rstrip() + ""
result["summary"] = {"text": text, "ai_generated": True, "model": model_summary}
if want_skills:
try:
ctx = build_exercise_placeholder_variables(
cur,
slug="exercise_skill_suggestions",
title=title,
goal=goal,
execution=execution,
focus_area_hint=focus_area_hint,
focus_areas_context=focus_areas_context,
)
except ValueError as e:
raise HTTPException(status_code=500, detail=str(e)) from e
try:
srow, rendered = load_and_render_ai_prompt(cur, "exercise_skill_suggestions", ctx)
except AiPromptUnavailableError:
raise HTTPException(
status_code=503,
detail="Prompt exercise_skill_suggestions nicht aktiv oder fehlt in DB.",
) from None
model_skills = effective_openrouter_model_for_prompt_row(srow)
models_by_slug["exercise_skill_suggestions"] = model_skills
prompt = rendered.text
if _ai_debug_on():
_LOGGER.warning(
"AI_DEBUG exercise_ai skills prompt_slug=exercise_skill_suggestions catalog_chars=%s prompt_chars=%s "
"template_has_skills_placeholder=%s",
len(ctx.get("skills_catalog") or ""),
len(prompt),
"{{skills_catalog}}" in str(srow.get("template") or ""),
)
sys_hint = (
"Du antwortest nur mit validem JSON (Array). Keine Kommentare, keine Erklaerungen ausserhalb des JSON."
)
try:
raw = openrouter_chat_completion(
api_key=key,
model=model_skills,
user_content=prompt,
system_content=sys_hint,
temperature=0.15,
)
except OpenRouterError as e:
raise HTTPException(status_code=502, detail=f"OpenRouter: {e}") from e
if _ai_debug_on():
_LOGGER.warning("AI_DEBUG exercise_ai skills response_chars=%s", len(raw or ""))
body = (raw or "").strip()
if not body:
raise HTTPException(
status_code=502,
detail="OpenRouter/KI lieferte leeren Inhalt für Skill-JSON.",
)
frag = _first_balanced_json_array(body)
if frag:
body = frag
try:
parsed = _extract_json_array(body)
except (json.JSONDecodeError, ValueError) as e:
if _ai_debug_on():
_LOGGER.warning(
"AI_DEBUG exercise_ai skills JSON parse_failed err=%s head=%s",
e,
(body.replace("\r", "").replace("\n", " ").strip())[:400],
)
raise HTTPException(
status_code=502,
detail="KI lieferte kein verwertbares JSON fuer Skills.",
) from e
skills = _sanitize_skill_entries(cur, parsed)
if _ai_debug_on():
cand_n = len(parsed) if isinstance(parsed, list) else -1
_LOGGER.warning("AI_DEBUG exercise_ai skills parsed_len=%s sanitized_kept=%s", cand_n, len(skills))
result["skills"] = skills
if want_instructions:
try:
ctx = build_exercise_placeholder_variables(
cur,
slug="exercise_instruction_rewrite",
title=title,
goal=goal,
execution=execution,
preparation=preparation,
trainer_notes=trainer_notes,
focus_area_hint=focus_area_hint,
focus_areas_context=focus_areas_context,
)
except ValueError as e:
raise HTTPException(status_code=500, detail=str(e)) from e
try:
irow, rendered = load_and_render_ai_prompt(cur, "exercise_instruction_rewrite", ctx)
except AiPromptUnavailableError:
raise HTTPException(
status_code=503,
detail="Prompt exercise_instruction_rewrite nicht aktiv oder fehlt in DB.",
) from None
model_instr = effective_openrouter_model_for_prompt_row(irow)
models_by_slug["exercise_instruction_rewrite"] = model_instr
prompt = rendered.text
if _ai_debug_on():
_LOGGER.warning(
"AI_DEBUG exercise_ai instructions prompt_slug=exercise_instruction_rewrite prompt_chars=%s",
len(prompt),
)
sys_hint = (
"Du antwortest nur mit validem JSON-Objekt (Schluessel goal, execution, preparation, trainer_notes). "
"Keine Kommentare ausserhalb des JSON."
)
try:
raw = openrouter_chat_completion(
api_key=key,
model=model_instr,
user_content=prompt,
system_content=sys_hint,
temperature=0.2,
)
except OpenRouterError as e:
raise HTTPException(status_code=502, detail=f"OpenRouter: {e}") from e
body = (raw or "").strip()
if not body:
raise HTTPException(
status_code=502,
detail="OpenRouter/KI lieferte leeren Inhalt fuer Anleitungs-Ueberarbeitung.",
)
try:
parsed = _extract_instruction_rewrite_object(body)
except (json.JSONDecodeError, ValueError) as e:
if _ai_debug_on():
_LOGGER.warning(
"AI_DEBUG exercise_ai instructions JSON parse_failed err=%s head=%s",
e,
(body.replace("\r", "").replace("\n", " ").strip())[:400],
)
raise HTTPException(
status_code=502,
detail="KI lieferte kein verwertbares JSON fuer die Anleitung.",
) from e
originals = {
"goal": goal,
"execution": execution,
"preparation": preparation,
"trainer_notes": trainer_notes,
}
fields = _sanitize_instruction_rewrite_payload(parsed, originals=originals)
if not any((fields.get(k) or "").strip() for k in _INSTRUCTION_JSON_KEYS):
raise HTTPException(
status_code=502,
detail="KI lieferte leere Anleitungs-Felder.",
)
result["instructions"] = {
"fields": fields,
"ai_generated": True,
"model": model_instr,
}
result["models_by_slug"] = models_by_slug
if want_skills:
result["model"] = models_by_slug["exercise_skill_suggestions"]
elif want_instructions:
result["model"] = models_by_slug["exercise_instruction_rewrite"]
elif want_summary:
result["model"] = models_by_slug["exercise_summary"]
else:
result["model"] = default_openrouter_model_id()
return result
__all__ = [
"build_contextual_skills_catalog_block",
"build_exercise_placeholder_variables",
"run_exercise_ai_suggestion",
"strip_html_to_plain",
]