shinkan-jinkendo/backend/exercise_ai.py

"""
KI-Vorschlaege fuer Uebungsformular: Laedt Prompts aus ai_prompts, ruft OpenRouter auf.
Keine persistente Aenderung an exercises — nur Response-DTO fuer das Frontend.

Skill-Katalog fuer Prompts: priorisierte Auswahl (ai_skill_retrieval_profiles, Fallback-Heuristik).
"""
from __future__ import annotations

import copy
import html
import json
import logging
import math
import os
import re
from typing import Any, Dict, List, Mapping, MutableMapping, Optional, Sequence, Tuple

from fastapi import HTTPException

from openrouter_chat import (
    OpenRouterError,
    default_openrouter_model_id,
    effective_openrouter_model_for_prompt_row,
    normalize_openrouter_env,
    openrouter_chat_completion,
)

from ai_prompt_context import ExerciseFormAiPromptContext
from ai_prompt_runtime import AiPromptUnavailableError, load_and_render_ai_prompt
from exercise_rich_text import collect_inline_exercise_media_ids, normalize_inline_exercise_media_markup

_LOGGER = logging.getLogger("shinkan.exercise_ai")


def _ai_debug_on() -> bool:
    return os.getenv("SHINKAN_AI_DEBUG", "").strip().lower() in ("1", "true", "yes", "full")


_CANONICAL_SKILL_LEVELS = frozenset({"basis", "grundlagen", "aufbau", "fortgeschritten", "optimierung"})
_LEGACY_SKILL_LEVEL_SLUG = {
    "einsteiger": "basis",
    "experte": "optimierung",
    "1": "basis",
    "2": "grundlagen",
    "3": "aufbau",
    "4": "fortgeschritten",
    "5": "optimierung",
}
_ALLOWED_SKILL_INTENSITY = frozenset({"niedrig", "mittel", "hoch"})

_TAG_RE = re.compile(r"<[^>]+>", re.IGNORECASE)
_TOKEN_FIND = re.compile(r"[a-zäöüß0-9]+", re.IGNORECASE)

_MAX_PLAIN_FIELD = 28_000
_MAX_SKILLS_CATALOG_LINES = 240
_MAX_SUMMARY_CHARS = 220
_MAX_SANITIZE_SKILL_INPUT_ROWS = 250

_FALLBACK_RETRIEVAL_CONFIG: Dict[str, Any] = {
    "version": 1,
    "importance_multiplier": 1.0,
    "text_overlap_bonus": 2.0,
    "main_slug_weights": {"karate": 1.0, "allgemeine": 1.0},
    "category_slug_weights": {},
    "category_max_share": {"kondition": 0.38, "koordination": 0.35},
    "main_min_share": {},
    "description_plain_max_len": 160,
    "karate_relevance_max_len": 72,
    "keyword_overrides": [],
}


def _normalize_exercise_skill_level(value) -> Optional[str]:
    if value is None:
        return None
    s = str(value).strip().lower()
    if not s:
        return None
    if s in _CANONICAL_SKILL_LEVELS:
        return s
    return _LEGACY_SKILL_LEVEL_SLUG.get(s)


def _normalize_exercise_skill_intensity(value) -> str:
    if value is None:
        return "mittel"
    key = str(value).strip().lower()
    if key in ("low",):
        return "niedrig"
    if key in ("medium",):
        return "mittel"
    if key in ("high",):
        return "hoch"
    if key in _ALLOWED_SKILL_INTENSITY:
        return key
    return "mittel"


def strip_html_to_plain(html: Optional[str], *, max_len: int = _MAX_PLAIN_FIELD) -> str:
    if not html:
        return ""
    t = _TAG_RE.sub(" ", str(html))
    t = re.sub(r"\s+", " ", t).strip()
    if len(t) > max_len:
        t = t[: max_len - 1].rstrip() + "…"
    return t


def _corpus_tokens(*parts: str) -> frozenset:
    hay = " ".join(p.strip() for p in parts if p and p.strip())
    ws = {_m.group(0).lower() for _m in _TOKEN_FIND.finditer(hay)}
    return frozenset(w for w in ws if len(w) > 1)


def _ai_profiles_table_ready(cur) -> bool:
    cur.execute("SELECT to_regclass(%s)::text AS t", ("public.ai_skill_retrieval_profiles",))
    row = cur.fetchone()
    if row is None:
        return False
    val = row["t"] if isinstance(row, dict) else row[0]
    return val is not None and str(val).strip() != ""


def _average_float_dict(dicts: Sequence[Mapping[str, Any]], *, fallback: float) -> Dict[str, float]:
    keys: set = set()
    for d in dicts:
        keys |= set(d.keys())
    out: Dict[str, float] = {}
    for k in keys:
        vals = []
        for d in dicts:
            if k not in d or d[k] is None:
                continue
            try:
                vals.append(float(d[k]))
            except (TypeError, ValueError):
                continue
        out[k] = (sum(vals) / len(vals)) if vals else fallback
    return out


def _merge_retrieval_configs(configs: Sequence[Dict[str, Any]]) -> Dict[str, Any]:
    base = copy.deepcopy(_FALLBACK_RETRIEVAL_CONFIG)
    if not configs:
        return base

    base["main_slug_weights"] = _average_float_dict(
        [c.get("main_slug_weights") or {} for c in configs],
        fallback=1.0,
    )
    for slug in ("karate", "allgemeine"):
        base["main_slug_weights"].setdefault(slug, 1.0)

    base["category_slug_weights"] = _average_float_dict(
        [c.get("category_slug_weights") or {} for c in configs],
        fallback=1.0,
    )
    base["category_max_share"] = _average_float_dict(
        [c.get("category_max_share") or {} for c in configs],
        fallback=1.0,
    )
    base["main_min_share"] = _average_float_dict(
        [c.get("main_min_share") or {} for c in configs],
        fallback=0.0,
    )

    ims = []
    tbs = []
    dmx = []
    krm = []
    for c in configs:
        try:
            if c.get("importance_multiplier") is not None:
                ims.append(float(c["importance_multiplier"]))
        except (TypeError, ValueError):
            continue
        try:
            if c.get("text_overlap_bonus") is not None:
                tbs.append(float(c["text_overlap_bonus"]))
        except (TypeError, ValueError):
            continue
        try:
            if c.get("description_plain_max_len") is not None:
                dmx.append(int(c["description_plain_max_len"]))
        except (TypeError, ValueError):
            continue
        try:
            if c.get("karate_relevance_max_len") is not None:
                krm.append(int(c["karate_relevance_max_len"]))
        except (TypeError, ValueError):
            continue
    if ims:
        base["importance_multiplier"] = sum(ims) / len(ims)
    if tbs:
        base["text_overlap_bonus"] = sum(tbs) / len(tbs)
    if dmx:
        base["description_plain_max_len"] = int(round(sum(dmx) / len(dmx)))
    if krm:
        base["karate_relevance_max_len"] = int(round(sum(krm) / len(krm)))

    overrides: List[Any] = []
    for c in configs:
        overrides.extend(c.get("keyword_overrides") or [])
    base["keyword_overrides"] = overrides
    return base


def _mul_weight_dict(target: MutableMapping[str, float], patch: Mapping[str, Any]) -> None:
    for k, v in patch.items():
        try:
            mul = float(v)
        except (TypeError, ValueError):
            continue
        target[k] = float(target.get(k, 1.0)) * mul


def _apply_keyword_overrides(cfg: Dict[str, Any], corpus_lower: str) -> None:
    caps = cfg.setdefault("category_max_share", {})
    for ov in cfg.get("keyword_overrides") or []:
        keys_any = ov.get("keywords_any") or []
        if not keys_any or not corpus_lower.strip():
            continue
        hay = corpus_lower.lower() if corpus_lower else ""
        hit = False
        for kw in keys_any:
            ks = str(kw or "").strip()
            if not ks:
                continue
            ks_l = ks.lower()
            hit = ks_l in hay
            if hit:
                break
        if not hit:
            continue
        patch = ov.get("patch") or {}
        _mul_weight_dict(cfg.setdefault("category_slug_weights", {}), patch.get("category_slug_weights") or {})
        _mul_weight_dict(cfg.setdefault("main_slug_weights", {}), patch.get("main_slug_weights") or {})
        for slug, mx in (patch.get("category_max_share") or {}).items():
            try:
                mx_f = float(mx)
            except (TypeError, ValueError):
                continue
            cur = float(caps.get(slug, 1.0))
            caps[slug] = min(cur, mx_f)


def _ordered_focus_ids(focus_ctx: Optional[Sequence[Tuple[int, bool]]]) -> List[int]:
    """Primär zuerst, dann stabil nach ID."""
    if not focus_ctx:
        return []
    seen = set()
    ordered: List[Tuple[int, bool]] = []
    for fid, isp in sorted(focus_ctx, key=lambda x: (not x[1], x[0])):
        try:
            i = int(fid)
        except (TypeError, ValueError):
            continue
        if i < 1 or i in seen:
            continue
        seen.add(i)
        ordered.append((i, bool(isp)))
    return [fid for fid, _ in ordered]


def _load_merged_retrieval_config(
    cur, focus_ctx: Optional[Sequence[Tuple[int, bool]]]
) -> Dict[str, Any]:
    if not _ai_profiles_table_ready(cur):
        return copy.deepcopy(_FALLBACK_RETRIEVAL_CONFIG)

    loaded: List[Dict[str, Any]] = []
    for fid in _ordered_focus_ids(focus_ctx):
        cur.execute(
            """
            SELECT config
            FROM ai_skill_retrieval_profiles
            WHERE active = true AND focus_area_id = %s
            LIMIT 1
            """,
            (fid,),
        )
        rw = cur.fetchone()
        if not rw:
            continue
        raw = rw["config"] if isinstance(rw, dict) else rw[0]
        if isinstance(raw, str):
            try:
                raw = json.loads(raw)
            except json.JSONDecodeError:
                continue
        if isinstance(raw, dict):
            loaded.append(raw)

    if not loaded:
        cur.execute(
            """
            SELECT config
            FROM ai_skill_retrieval_profiles
            WHERE active = true AND is_default = true
            LIMIT 1
            """
        )
        rw = cur.fetchone()
        if rw:
            raw = rw["config"] if isinstance(rw, dict) else rw[0]
            if isinstance(raw, str):
                try:
                    raw = json.loads(raw)
                except json.JSONDecodeError:
                    raw = None
            if isinstance(raw, dict):
                loaded.append(raw)

    return _merge_retrieval_configs(loaded)


def _fetch_all_active_skills_for_catalog(cur) -> List[Dict[str, Any]]:
    cur.execute(
        """
        SELECT s.id,
               s.name,
               s.category,
               s.description,
               s.karate_relevance,
               s.relevance_level,
               s.importance,
               COALESCE(m.slug, '') AS main_slug,
               COALESCE(c.slug, '') AS category_slug,
               c.name AS subcategory_name
        FROM skills s
        LEFT JOIN skill_main_categories m ON m.id = s.main_category_id
        LEFT JOIN skill_categories c ON c.id = s.category_id
        WHERE (s.status IS NULL OR s.status = 'active')
        """
    )
    return [dict(r) for r in cur.fetchall()]


def _score_skill_row(
    row: Mapping[str, Any],
    cfg: Mapping[str, Any],
    corpus_tokens: frozenset,
) -> float:
    main_slug = str(row.get("main_slug") or "").strip().lower()
    cat_slug = str(row.get("category_slug") or "").strip().lower()
    main_w = float((cfg.get("main_slug_weights") or {}).get(main_slug, 1.0))
    cat_w = float((cfg.get("category_slug_weights") or {}).get(cat_slug, 1.0))
    try:
        imp = int(row["importance"]) if row.get("importance") is not None else 3
    except (TypeError, ValueError):
        imp = 3
    imp = max(1, min(5, imp))
    imp_mult = float(cfg.get("importance_multiplier") or 1.0)
    base = float(imp) * imp_mult * max(main_w, 0.05) * max(cat_w, 0.05)

    name = strip_html_to_plain(row.get("name"), max_len=400)
    dsc = strip_html_to_plain(row.get("description"), max_len=520)
    search_blob = " ".join(
        [
            name,
            dsc,
            cat_slug.replace("_", " "),
            str(row.get("category") or ""),
            str(row.get("subcategory_name") or ""),
        ]
    ).lower()

    overlaps = sum(1 for t in corpus_tokens if t and t in search_blob)
    tob = float(cfg.get("text_overlap_bonus") or 0.0)

    return base + overlaps * tob


def _category_cap_limits(cfg: Mapping[str, Any], n_max: int) -> Dict[str, int]:
    out: Dict[str, int] = {}
    mx = cfg.get("category_max_share") or {}
    if not isinstance(mx, dict):
        return out
    for slug, raw in mx.items():
        ks = str(slug or "").strip()
        if not ks:
            continue
        try:
            sh = float(raw)
        except (TypeError, ValueError):
            continue
        if 0 < sh < 1.0:
            out[ks] = max(1, int(math.floor(sh * n_max)))
        elif sh >= 1.0:
            out[ks] = n_max + 99999
        else:
            continue
    return out


def _pick_catalog_rows(rows_scored: List[Tuple[float, Dict[str, Any]]], cfg: Mapping[str, Any]) -> List[Dict[str, Any]]:
    """rows_scored: (score, row_dict) ohne Sortierung-Anforderung."""
    cap_limits = _category_cap_limits(cfg, _MAX_SKILLS_CATALOG_LINES)
    ordered = sorted(rows_scored, key=lambda x: (-x[0], str(x[1].get("name") or "")))
    picked: List[Dict[str, Any]] = []
    picked_ids: set = set()
    cat_counts: Dict[str, int] = {}

    def under_cap(cat_slug: str) -> bool:
        if not cat_slug or cat_slug not in cap_limits:
            return True
        return cat_counts.get(cat_slug, 0) < cap_limits[cat_slug]

    # Pass 1: Cap respektieren
    for _sc, rw in ordered:
        if len(picked) >= _MAX_SKILLS_CATALOG_LINES:
            break
        sid = rw["id"]
        if sid in picked_ids:
            continue
        cslug = str(rw.get("category_slug") or "").strip().lower()
        if cslug and not under_cap(cslug):
            continue
        picked.append(rw)
        picked_ids.add(sid)
        if cslug:
            cat_counts[cslug] = cat_counts.get(cslug, 0) + 1

    # Pass 2: auffüllen
    if len(picked) < _MAX_SKILLS_CATALOG_LINES:
        for _sc, rw in ordered:
            if len(picked) >= _MAX_SKILLS_CATALOG_LINES:
                break
            sid = rw["id"]
            if sid in picked_ids:
                continue
            picked.append(rw)
            picked_ids.add(sid)

    return picked[:_MAX_SKILLS_CATALOG_LINES]


def _format_skill_catalog_line(row: Mapping[str, Any], cfg: Mapping[str, Any]) -> str:
    rid = int(row["id"])
    nm = (row.get("name") or "").strip() or f"Skill #{rid}"
    cat_legacy = str(row.get("category") or "").strip()
    sub = str(row.get("subcategory_name") or "").strip()
    main_slug = str(row.get("main_slug") or "").strip()
    cats = " / ".join(x for x in (main_slug.upper() if main_slug else "", cat_legacy, sub) if x)

    dmax = int(cfg.get("description_plain_max_len") or 160)
    dsc = strip_html_to_plain(row.get("description"), max_len=max(40, min(400, dmax)))

    krmax = int(cfg.get("karate_relevance_max_len") or 0)
    kr = strip_html_to_plain(row.get("karate_relevance"), max_len=min(280, krmax)) if krmax > 0 else ""
    rel = row.get("relevance_level")
    rel_s = str(rel).strip() if rel is not None else ""

    parts = [
        f"- id={rid} | name={nm}",
        f" | kategorie={cats or '-'}",
        f" | beschreibung={dsc or '-'}",
    ]
    if krmax > 0 and (kr.strip() or rel_s):
        parts.append(f" | karate_relevanz={kr or '-'} | relevanz_stufe={rel_s or '-'}")
    return "".join(parts)


def _safe_int_importance(value: Any) -> int:
    try:
        iv = int(value)
    except (TypeError, ValueError):
        return 0
    return max(1, min(5, iv)) if iv else 0


def build_contextual_skills_catalog_block(
    cur,
    *,
    title: Optional[str],
    goal_plain: str,
    execution_plain: str,
    focus_hint: Optional[str],
    focus_ctx: Optional[Sequence[Tuple[int, bool]]],
) -> str:
    cfg = _load_merged_retrieval_config(cur, focus_ctx)
    corpus_lower = " ".join([title or "", goal_plain or "", execution_plain or "", focus_hint or ""]).lower()
    _apply_keyword_overrides(cfg, corpus_lower)

    tok = _corpus_tokens(title or "", goal_plain, execution_plain, focus_hint or "")
    skill_rows = _fetch_all_active_skills_for_catalog(cur)
    scored: List[Tuple[float, Dict[str, Any]]] = []
    for r in skill_rows:
        scored.append((_score_skill_row(r, cfg, tok), r))
    picked = _pick_catalog_rows(scored, cfg)
    picked.sort(
        key=lambda r: (
            -_safe_int_importance(r.get("importance")),
            str(r.get("name") or "").lower(),
        )
    )

    lines = [_format_skill_catalog_line(row, cfg) for row in picked]
    return "\n".join(lines) if lines else "(keine aktiven Skills im Katalog)"


_MAX_INSTRUCTION_GOAL_PLAIN = 4_000
_MAX_INSTRUCTION_EXECUTION_PLAIN = 12_000
_MAX_INSTRUCTION_PREP_PLAIN = 2_500
_MAX_INSTRUCTION_TRAINER_PLAIN = 2_500

_INSTRUCTION_JSON_KEYS = ("goal", "execution", "preparation", "trainer_notes")
_INSTRUCTION_FIELD_MAX_PLAIN = {
    "goal": _MAX_INSTRUCTION_GOAL_PLAIN,
    "execution": _MAX_INSTRUCTION_EXECUTION_PLAIN,
    "preparation": _MAX_INSTRUCTION_PREP_PLAIN,
    "trainer_notes": _MAX_INSTRUCTION_TRAINER_PLAIN,
}

_DISALLOWED_HTML_TAG_RE = re.compile(
    r"</?\s*(?!p\b|ul\b|ol\b|li\b|strong\b|b\b|em\b|i\b|br\b|span\b)[a-zA-Z][^>]*>",
    re.IGNORECASE,
)
_SCRIPT_STYLE_RE = re.compile(r"(?is)<(script|style)[^>]*>.*?</\1>")


def _plain_to_minimal_instruction_html(text: str) -> str:
    raw = (text or "").strip()
    if not raw:
        return ""
    parts = [p.strip() for p in re.split(r"\n+", raw) if p.strip()]
    if not parts:
        return ""
    return "".join(f"<p>{html.escape(p)}</p>" for p in parts)


def _truncate_plain(text: str, max_len: int) -> str:
    t = (text or "").strip()
    if len(t) <= max_len:
        return t
    return t[: max_len - 1].rstrip() + "…"


def _sanitize_instruction_field_html(raw: Any, *, max_plain: int) -> str:
    if raw is None:
        return ""
    s = str(raw).strip()
    if not s:
        return ""
    if s.startswith("```"):
        s = re.sub(r"^```[a-zA-Z0-9]*\s*", "", s)
        if s.endswith("```"):
            s = s[:-3].strip()
    s = _SCRIPT_STYLE_RE.sub("", s)
    s = _DISALLOWED_HTML_TAG_RE.sub("", s)
    if "<" not in s:
        s = _plain_to_minimal_instruction_html(s)
    else:
        s = normalize_inline_exercise_media_markup(s) or ""
    plain = strip_html_to_plain(s, max_len=max_plain + 200)
    if len(plain) > max_plain:
        plain = _truncate_plain(plain, max_plain)
        s = _plain_to_minimal_instruction_html(plain)
    return (normalize_inline_exercise_media_markup(s) or "").strip()


def _merge_preserved_inline_media(original: Optional[str], revised: str) -> str:
    """Haengt fehlende Medien-Verweise aus dem Ausgangstext ans Ende an."""
    out = (revised or "").strip()
    orig_ids = collect_inline_exercise_media_ids(original)
    if not orig_ids:
        return out
    new_ids = collect_inline_exercise_media_ids(out)
    missing = sorted(orig_ids - new_ids)
    if not missing:
        return out
    spans = []
    for mid in missing:
        spans.append(
            f'<span data-shinkan-exercise-media="{mid}" data-shinkan-exercise-media-size="medium" '
            f'class="shinkan-inline-media"></span>'
        )
    block = f"<p>{''.join(spans)}</p>"
    return (out + block).strip() if out else block


def _first_balanced_json_object(text: str) -> Optional[str]:
    i = text.find("{")
    if i < 0:
        return None
    depth = 0
    in_str = False
    esc = False
    for j in range(i, len(text)):
        ch = text[j]
        if in_str:
            if esc:
                esc = False
            elif ch == "\\":
                esc = True
            elif ch == '"':
                in_str = False
            continue
        if ch == '"':
            in_str = True
            continue
        if ch == "{":
            depth += 1
        elif ch == "}":
            depth -= 1
            if depth == 0:
                return text[i : j + 1]
    return None


def _extract_instruction_rewrite_object(text: str) -> Dict[str, Any]:
    s = (text or "").strip()
    if not s:
        raise ValueError("leer")
    if s.startswith("```"):
        s = re.sub(r"^```[a-zA-Z0-9]*\s*", "", s)
        if s.endswith("```"):
            s = s[:-3].strip()
    frag = _first_balanced_json_object(s)
    if frag:
        s = frag
    obj = json.loads(s)
    if not isinstance(obj, dict):
        raise ValueError("kein JSON-Objekt")
    return obj


def _sanitize_instruction_rewrite_payload(
    parsed: Mapping[str, Any],
    *,
    originals: Mapping[str, Optional[str]],
) -> Dict[str, str]:
    out: Dict[str, str] = {}
    for key in _INSTRUCTION_JSON_KEYS:
        max_plain = _INSTRUCTION_FIELD_MAX_PLAIN[key]
        html = _sanitize_instruction_field_html(parsed.get(key), max_plain=max_plain)
        html = _merge_preserved_inline_media(originals.get(key), html)
        out[key] = html
    return out


def build_exercise_placeholder_variables(
    cur,
    *,
    slug: str,
    title: Optional[str],
    goal: Optional[str],
    execution: Optional[str],
    focus_area_hint: Optional[str],
    focus_areas_context: Optional[Sequence[Tuple[int, bool]]],
    preparation: Optional[str] = None,
    trainer_notes: Optional[str] = None,
) -> Dict[str, str]:
    """
    Baut die Variable-Map fuer {{platzhalter}} passend zur Slug fuer Uebungs-KI.
    """
    s = (slug or "").strip().lower()
    if s == "pipeline":
        return {}
    g_plain = strip_html_to_plain(goal)
    e_plain = strip_html_to_plain(execution)
    p_plain = strip_html_to_plain(preparation)
    n_plain = strip_html_to_plain(trainer_notes)
    t_title = (title or "").strip()
    focus = (focus_area_hint or "").strip()
    ctx: Dict[str, str] = {
        "exercise_title": t_title or "-",
        "exercise_focus_area": focus or "-",
        "exercise_goal": g_plain or "-",
        "exercise_execution": e_plain or "-",
        "exercise_preparation": p_plain or "-",
        "exercise_trainer_notes": n_plain or "-",
    }
    if s == "exercise_summary":
        return {k: ctx[k] for k in ("exercise_title", "exercise_focus_area", "exercise_goal", "exercise_execution")}
    if s == "exercise_instruction_rewrite":
        return ctx
    if s == "exercise_skill_suggestions":
        catalog = build_contextual_skills_catalog_block(
            cur,
            title=t_title,
            goal_plain=g_plain,
            execution_plain=e_plain,
            focus_hint=focus or None,
            focus_ctx=focus_areas_context,
        )
        ctx["skills_catalog"] = catalog
        return ctx
    raise ValueError(f"Kein Platzhalter-Kontext fuer slug={slug!r} definiert.")


def _first_balanced_json_array(text: str) -> Optional[str]:
    """Findet das erste vollständig geschlossene Top-Level-JSON-Array in beliebigem Fließtext."""
    i = text.find("[")
    if i < 0:
        return None
    depth = 0
    in_str = False
    esc = False
    for j in range(i, len(text)):
        ch = text[j]
        if in_str:
            if esc:
                esc = False
            elif ch == "\\":
                esc = True
            elif ch == '"':
                in_str = False
            continue
        if ch == '"':
            in_str = True
            continue
        if ch == "[":
            depth += 1
        elif ch == "]":
            depth -= 1
            if depth == 0:
                return text[i : j + 1]
    return None


def _extract_json_array(text: str) -> Any:
    s = text.strip()
    if s.startswith("```"):
        s = re.sub(r"^```[a-zA-Z0-9]*\s*", "", s)
        if s.endswith("```"):
            s = s[:-3].strip()
    if s.startswith("["):
        end = s.rfind("]")
        if end > 0:
            s = s[: end + 1]
            parsed = json.loads(s)
            if isinstance(parsed, list) and len(parsed) > _MAX_SANITIZE_SKILL_INPUT_ROWS:
                parsed = parsed[:_MAX_SANITIZE_SKILL_INPUT_ROWS]
            return parsed
    if s.startswith("{"):
        obj = json.loads(s)
        if isinstance(obj, dict):
            for k in ("skills", "items", "data"):
                v = obj.get(k)
                if isinstance(v, list):
                    if len(v) > _MAX_SANITIZE_SKILL_INPUT_ROWS:
                        return v[:_MAX_SANITIZE_SKILL_INPUT_ROWS]
                    return v
        raise ValueError("JSON-Objekt ohne Skills-Liste")
    parsed_end = json.loads(s)
    if isinstance(parsed_end, list) and len(parsed_end) > _MAX_SANITIZE_SKILL_INPUT_ROWS:
        return parsed_end[:_MAX_SANITIZE_SKILL_INPUT_ROWS]
    return parsed_end


def _sanitize_skill_entries(cur, rows: Any) -> List[Dict[str, Any]]:
    if not isinstance(rows, list):
        return []
    out: List[Dict[str, Any]] = []
    cap = rows[:_MAX_SANITIZE_SKILL_INPUT_ROWS]
    for raw in cap:
        if len(out) >= 5:
            break
        if not isinstance(raw, dict):
            continue
        sid = raw.get("skill_id")
        try:
            skill_id = int(sid)
        except (TypeError, ValueError):
            continue
        cur.execute(
            """
            SELECT s.id, s.name, s.category,
                   sc.name AS subcategory_name
            FROM skills s
            LEFT JOIN skill_categories sc ON s.category_id = sc.id
            WHERE s.id = %s AND (s.status IS NULL OR s.status = 'active')
            """,
            (skill_id,),
        )
        sk = cur.fetchone()
        if not sk:
            continue

        req = _normalize_exercise_skill_level(raw.get("required_level")) or "grundlagen"
        tgt = _normalize_exercise_skill_level(raw.get("target_level")) or req
        if req not in _CANONICAL_SKILL_LEVELS:
            req = _LEGACY_SKILL_LEVEL_SLUG.get(str(raw.get("required_level") or "").strip().lower(), "grundlagen")
            if req not in _CANONICAL_SKILL_LEVELS:
                req = "grundlagen"
        if tgt not in _CANONICAL_SKILL_LEVELS:
            tgt = _LEGACY_SKILL_LEVEL_SLUG.get(str(raw.get("target_level") or "").strip().lower(), req)
            if tgt not in _CANONICAL_SKILL_LEVELS:
                tgt = req

        inten = _normalize_exercise_skill_intensity(raw.get("intensity"))

        is_primary = bool(raw.get("is_primary")) if raw.get("is_primary") is not None else len(out) == 0

        cat = (sk.get("category") or "").strip()
        sub = (sk.get("subcategory_name") or "").strip()
        skill_category = " / ".join(x for x in (cat, sub) if x) or (cat or None)

        conf = raw.get("confidence")
        try:
            conf_f = float(conf) if conf is not None else None
        except (TypeError, ValueError):
            conf_f = None

        item: Dict[str, Any] = {
            "skill_id": skill_id,
            "skill_name": (sk.get("name") or "").strip() or f"Skill #{skill_id}",
            "required_level": req,
            "target_level": tgt,
            "intensity": inten,
            "is_primary": is_primary,
        }
        if skill_category:
            item["skill_category"] = skill_category
        if conf_f is not None:
            item["confidence"] = conf_f
        out.append(item)

    return out[:5]


def _require_openrouter_key() -> str:
    key, _ = normalize_openrouter_env()
    if not key:
        raise HTTPException(
            status_code=503,
            detail="KI nicht konfiguriert (OPENROUTER_API_KEY fehlt).",
        )
    return key


def run_exercise_ai_suggestion(
    cur,
    *,
    form_ctx: ExerciseFormAiPromptContext,
    want_summary: bool,
    want_skills: bool,
    want_instructions: bool = False,
) -> Dict[str, Any]:
    key = _require_openrouter_key()

    title = form_ctx.title
    goal = form_ctx.goal
    execution = form_ctx.execution
    preparation = form_ctx.preparation
    trainer_notes = form_ctx.trainer_notes
    focus_area_hint = form_ctx.focus_hint
    focus_areas_context = form_ctx.focus_area_tuples()

    g_plain = strip_html_to_plain(goal)
    e_plain = strip_html_to_plain(execution)
    if want_instructions:
        if not form_ctx.has_instruction_source_text():
            raise HTTPException(
                status_code=400,
                detail="Fuer Anleitungs-Ueberarbeitung mindestens Titel oder ein Anleitungsfeld ausfuellen.",
            )
    elif not (g_plain.strip() or e_plain.strip()):
        raise HTTPException(
            status_code=400,
            detail="Mindestens Ziel oder Durchfuehrung muss Inhalt liefern (nach Entfernen von leerem HTML).",
        )

    t_title = (title or "").strip()
    focus = (focus_area_hint or "").strip()

    result: Dict[str, Any] = {}
    models_by_slug: Dict[str, str] = {}

    if _ai_debug_on():
        fid_list = ",".join(str(x) for x in _ordered_focus_ids(focus_areas_context))
        _LOGGER.warning(
            "AI_DEBUG exercise_ai suggest want_summary=%s want_skills=%s want_instructions=%s "
            "title_chars=%s goal_plain_chars=%s exec_plain_chars=%s focus_hint_chars=%s focus_ctx_ids=[%s]",
            want_summary,
            want_skills,
            want_instructions,
            len((title or "").strip()),
            len(g_plain),
            len(e_plain),
            len((focus_area_hint or "").strip()),
            fid_list,
        )

    if want_summary:
        try:
            ctx = build_exercise_placeholder_variables(
                cur,
                slug="exercise_summary",
                title=title,
                goal=goal,
                execution=execution,
                focus_area_hint=focus_area_hint,
                focus_areas_context=focus_areas_context,
            )
        except ValueError as e:
            raise HTTPException(status_code=500, detail=str(e)) from e
        try:
            prow, rendered = load_and_render_ai_prompt(cur, "exercise_summary", ctx)
        except AiPromptUnavailableError:
            raise HTTPException(
                status_code=503,
                detail="Prompt exercise_summary nicht aktiv oder fehlt in DB.",
            ) from None
        model_summary = effective_openrouter_model_for_prompt_row(prow)
        models_by_slug["exercise_summary"] = model_summary
        prompt = rendered.text
        if _ai_debug_on():
            _LOGGER.warning(
                "AI_DEBUG exercise_ai summary prompt_slug=exercise_summary prompt_chars=%s placeholders_remaining=%s",
                len(prompt),
                len(rendered.placeholders_remaining),
            )
        try:
            raw = openrouter_chat_completion(api_key=key, model=model_summary, user_content=prompt)
        except OpenRouterError as e:
            raise HTTPException(status_code=502, detail=f"OpenRouter: {e}") from e
        if _ai_debug_on():
            _LOGGER.warning("AI_DEBUG exercise_ai summary response_chars=%s", len(raw or ""))
        text = (raw or "").strip()
        if not text:
            raise HTTPException(
                status_code=502,
                detail="OpenRouter/KI lieferte eine leere Kurzfassung (kein Modelltext).",
            )
        if len(text) > _MAX_SUMMARY_CHARS:
            text = text[: _MAX_SUMMARY_CHARS - 1].rstrip() + "…"
        result["summary"] = {"text": text, "ai_generated": True, "model": model_summary}

    if want_skills:
        try:
            ctx = build_exercise_placeholder_variables(
                cur,
                slug="exercise_skill_suggestions",
                title=title,
                goal=goal,
                execution=execution,
                focus_area_hint=focus_area_hint,
                focus_areas_context=focus_areas_context,
            )
        except ValueError as e:
            raise HTTPException(status_code=500, detail=str(e)) from e
        try:
            srow, rendered = load_and_render_ai_prompt(cur, "exercise_skill_suggestions", ctx)
        except AiPromptUnavailableError:
            raise HTTPException(
                status_code=503,
                detail="Prompt exercise_skill_suggestions nicht aktiv oder fehlt in DB.",
            ) from None
        model_skills = effective_openrouter_model_for_prompt_row(srow)
        models_by_slug["exercise_skill_suggestions"] = model_skills
        prompt = rendered.text
        if _ai_debug_on():
            _LOGGER.warning(
                "AI_DEBUG exercise_ai skills prompt_slug=exercise_skill_suggestions catalog_chars=%s prompt_chars=%s "
                "template_has_skills_placeholder=%s",
                len(ctx.get("skills_catalog") or ""),
                len(prompt),
                "{{skills_catalog}}" in str(srow.get("template") or ""),
            )
        sys_hint = (
            "Du antwortest nur mit validem JSON (Array). Keine Kommentare, keine Erklaerungen ausserhalb des JSON."
        )
        try:
            raw = openrouter_chat_completion(
                api_key=key,
                model=model_skills,
                user_content=prompt,
                system_content=sys_hint,
                temperature=0.15,
            )
        except OpenRouterError as e:
            raise HTTPException(status_code=502, detail=f"OpenRouter: {e}") from e
        if _ai_debug_on():
            _LOGGER.warning("AI_DEBUG exercise_ai skills response_chars=%s", len(raw or ""))
        body = (raw or "").strip()
        if not body:
            raise HTTPException(
                status_code=502,
                detail="OpenRouter/KI lieferte leeren Inhalt für Skill-JSON.",
            )
        frag = _first_balanced_json_array(body)
        if frag:
            body = frag
        try:
            parsed = _extract_json_array(body)
        except (json.JSONDecodeError, ValueError) as e:
            if _ai_debug_on():
                _LOGGER.warning(
                    "AI_DEBUG exercise_ai skills JSON parse_failed err=%s head=%s",
                    e,
                    (body.replace("\r", "").replace("\n", " ").strip())[:400],
                )
            raise HTTPException(
                status_code=502,
                detail="KI lieferte kein verwertbares JSON fuer Skills.",
            ) from e
        skills = _sanitize_skill_entries(cur, parsed)
        if _ai_debug_on():
            cand_n = len(parsed) if isinstance(parsed, list) else -1
            _LOGGER.warning("AI_DEBUG exercise_ai skills parsed_len=%s sanitized_kept=%s", cand_n, len(skills))

        result["skills"] = skills

    if want_instructions:
        try:
            ctx = build_exercise_placeholder_variables(
                cur,
                slug="exercise_instruction_rewrite",
                title=title,
                goal=goal,
                execution=execution,
                preparation=preparation,
                trainer_notes=trainer_notes,
                focus_area_hint=focus_area_hint,
                focus_areas_context=focus_areas_context,
            )
        except ValueError as e:
            raise HTTPException(status_code=500, detail=str(e)) from e
        try:
            irow, rendered = load_and_render_ai_prompt(cur, "exercise_instruction_rewrite", ctx)
        except AiPromptUnavailableError:
            raise HTTPException(
                status_code=503,
                detail="Prompt exercise_instruction_rewrite nicht aktiv oder fehlt in DB.",
            ) from None
        model_instr = effective_openrouter_model_for_prompt_row(irow)
        models_by_slug["exercise_instruction_rewrite"] = model_instr
        prompt = rendered.text
        if _ai_debug_on():
            _LOGGER.warning(
                "AI_DEBUG exercise_ai instructions prompt_slug=exercise_instruction_rewrite prompt_chars=%s",
                len(prompt),
            )
        sys_hint = (
            "Du antwortest nur mit validem JSON-Objekt (Schluessel goal, execution, preparation, trainer_notes). "
            "Keine Kommentare ausserhalb des JSON."
        )
        try:
            raw = openrouter_chat_completion(
                api_key=key,
                model=model_instr,
                user_content=prompt,
                system_content=sys_hint,
                temperature=0.2,
            )
        except OpenRouterError as e:
            raise HTTPException(status_code=502, detail=f"OpenRouter: {e}") from e
        body = (raw or "").strip()
        if not body:
            raise HTTPException(
                status_code=502,
                detail="OpenRouter/KI lieferte leeren Inhalt fuer Anleitungs-Ueberarbeitung.",
            )
        try:
            parsed = _extract_instruction_rewrite_object(body)
        except (json.JSONDecodeError, ValueError) as e:
            if _ai_debug_on():
                _LOGGER.warning(
                    "AI_DEBUG exercise_ai instructions JSON parse_failed err=%s head=%s",
                    e,
                    (body.replace("\r", "").replace("\n", " ").strip())[:400],
                )
            raise HTTPException(
                status_code=502,
                detail="KI lieferte kein verwertbares JSON fuer die Anleitung.",
            ) from e
        originals = {
            "goal": goal,
            "execution": execution,
            "preparation": preparation,
            "trainer_notes": trainer_notes,
        }
        fields = _sanitize_instruction_rewrite_payload(parsed, originals=originals)
        if not any((fields.get(k) or "").strip() for k in _INSTRUCTION_JSON_KEYS):
            raise HTTPException(
                status_code=502,
                detail="KI lieferte leere Anleitungs-Felder.",
            )
        result["instructions"] = {
            "fields": fields,
            "ai_generated": True,
            "model": model_instr,
        }

    result["models_by_slug"] = models_by_slug
    if want_skills:
        result["model"] = models_by_slug["exercise_skill_suggestions"]
    elif want_instructions:
        result["model"] = models_by_slug["exercise_instruction_rewrite"]
    elif want_summary:
        result["model"] = models_by_slug["exercise_summary"]
    else:
        result["model"] = default_openrouter_model_id()

    return result


__all__ = [
    "build_contextual_skills_catalog_block",
    "build_exercise_placeholder_variables",
    "run_exercise_ai_suggestion",
    "strip_html_to_plain",
]