shinkan-jinkendo/backend/exercise_enrichment.py

"""
Superadmin-Werkzeug: Übungs-Anreicherung per KI (Skills + optional Metadaten).

Wiederverwendet run_exercise_form_ai_suggestion / exercise_ai — keine neue OpenRouter-Pipeline.
"""
from __future__ import annotations

from typing import Any, Dict, List, Literal, Optional

from ai_prompt_context import ExerciseFormAiPromptContext
from ai_prompt_job import run_exercise_form_ai_suggestion
from exercise_ai import strip_html_to_plain
from exercise_rich_text import normalize_inline_exercise_media_markup

from routers.exercises import (
    enrich_exercise_detail,
    normalize_exercise_skill_intensity,
    normalize_exercise_skill_level,
)

SkillMergeMode = Literal["additive", "replace_ai_only", "replace_all"]

SKILL_MERGE_MODES = frozenset({"additive", "replace_ai_only", "replace_all"})
DEFAULT_SET_STATUS = "in_review"
# Max. IDs pro Apply-HTTP-Anfrage (kein LLM).
MAX_BATCH_EXERCISES = 50
# Preview: pro Request nur wenige Übungen — sonst Gateway-504 (Fritz!Box o.ä. ~60s).
MAX_PREVIEW_BATCH_EXERCISES = 3

_INSTRUCTION_FIELDS = ("goal", "execution", "preparation", "trainer_notes")
_SKILL_COMPARE_KEYS = ("intensity", "required_level", "target_level", "is_primary")


def _focus_areas_ai_ctx_from_detail(exercise: Dict[str, Any]) -> list[tuple[int, bool]]:
    rows: list[tuple[int, bool]] = []
    for row in exercise.get("focus_areas") or []:
        if not isinstance(row, dict):
            continue
        try:
            fid = int(row.get("focus_area_id"))
        except (TypeError, ValueError):
            continue
        if fid < 1:
            continue
        rows.append((fid, bool(row.get("is_primary"))))
    rows.sort(key=lambda x: (not x[1], x[0]))
    return rows


def _focus_area_hint_from_detail(exercise: Dict[str, Any]) -> str:
    parts: List[str] = []
    for row in exercise.get("focus_areas") or []:
        if isinstance(row, dict):
            nm = (row.get("name") or "").strip()
            if nm:
                parts.append(nm)
    txt = ", ".join(parts).strip()
    if len(txt) > 900:
        return txt[:899] + "…"
    return txt


def build_form_context_from_exercise(exercise: Dict[str, Any]) -> ExerciseFormAiPromptContext:
    focus = _focus_area_hint_from_detail(exercise)
    fctx = _focus_areas_ai_ctx_from_detail(exercise)
    return ExerciseFormAiPromptContext.from_focus_tuples(
        title=str(exercise.get("title") or "").strip(),
        goal=exercise.get("goal"),
        execution=exercise.get("execution"),
        preparation=exercise.get("preparation"),
        trainer_notes=exercise.get("trainer_notes"),
        focus_hint=focus or None,
        focus_tuples=fctx or None,
    )


def validate_exercise_for_enrichment(
    exercise: Dict[str, Any],
    *,
    want_skills: bool = False,
    want_summary: bool = False,
    want_instructions: bool = False,
) -> Optional[str]:
    title = str(exercise.get("title") or "").strip()
    if not title:
        return "Titel fehlt"

    ctx = build_form_context_from_exercise(exercise)
    g_plain = strip_html_to_plain(exercise.get("goal"))
    e_plain = strip_html_to_plain(exercise.get("execution"))

    if want_skills or want_summary:
        if not (g_plain.strip() or e_plain.strip()):
            return "Mindestens Ziel oder Durchführung muss Inhalt liefern (für Skills/Kurzfassung)"

    if want_instructions and not ctx.has_instruction_source_text():
        return "Für Anleitungs-Überarbeitung fehlt Ausgangstext (Titel oder Anleitungsfeld)"

    if not (want_skills or want_summary or want_instructions):
        return "Kein Anreicherungsmodus aktiv"

    return None


def _normalize_skill_row(raw: Dict[str, Any], *, ai_suggested: bool) -> Dict[str, Any]:
    return {
        "skill_id": int(raw["skill_id"]),
        "skill_name": (raw.get("skill_name") or "").strip() or f"Skill #{raw['skill_id']}",
        "skill_category": raw.get("skill_category"),
        "is_primary": bool(raw.get("is_primary")),
        "intensity": normalize_exercise_skill_intensity(raw.get("intensity")),
        "required_level": normalize_exercise_skill_level(raw.get("required_level")),
        "target_level": normalize_exercise_skill_level(raw.get("target_level")),
        "ai_suggested": ai_suggested,
    }


def _skill_meta_differs(a: Dict[str, Any], b: Dict[str, Any]) -> bool:
    for k in _SKILL_COMPARE_KEYS:
        av = a.get(k)
        bv = b.get(k)
        if k in ("required_level", "target_level"):
            av = normalize_exercise_skill_level(av)
            bv = normalize_exercise_skill_level(bv)
        elif k == "intensity":
            av = normalize_exercise_skill_intensity(av)
            bv = normalize_exercise_skill_intensity(bv)
        elif k == "is_primary":
            av = bool(av)
            bv = bool(bv)
        if av != bv:
            return True
    return False


def merge_skills(
    existing: List[Dict[str, Any]],
    suggested: List[Dict[str, Any]],
    mode: SkillMergeMode,
) -> List[Dict[str, Any]]:
    """Merge-Modi: additive | replace_ai_only | replace_all (alle KI-Skills mit ai_suggested=true)."""
    existing_norm = [_normalize_skill_row(s, ai_suggested=bool(s.get("ai_suggested"))) for s in existing]
    suggested_norm = [_normalize_skill_row(s, ai_suggested=True) for s in suggested]

    suggested_by_id = {int(s["skill_id"]): s for s in suggested_norm}

    if mode == "replace_all":
        return list(suggested_norm)

    if mode == "replace_ai_only":
        manual = [s for s in existing_norm if not s.get("ai_suggested")]
        manual_ids = {int(s["skill_id"]) for s in manual}
        result = list(manual)
        for s in suggested_norm:
            sid = int(s["skill_id"])
            if sid in manual_ids:
                continue
            result.append(s)
        return result

    # additive
    result: List[Dict[str, Any]] = []
    seen: set[int] = set()
    for s in existing_norm:
        sid = int(s["skill_id"])
        seen.add(sid)
        if sid in suggested_by_id and s.get("ai_suggested"):
            merged = {**s, **suggested_by_id[sid], "ai_suggested": True}
            result.append(merged)
        else:
            result.append(dict(s))
    for s in suggested_norm:
        sid = int(s["skill_id"])
        if sid not in seen:
            result.append(s)
            seen.add(sid)
    return result


def compute_skill_diff(
    before: List[Dict[str, Any]],
    after: List[Dict[str, Any]],
) -> Dict[str, Any]:
    before_ids = {int(s["skill_id"]): s for s in before}
    after_ids = {int(s["skill_id"]): s for s in after}
    added = [after_ids[i] for i in sorted(after_ids) if i not in before_ids]
    removed = [before_ids[i] for i in sorted(before_ids) if i not in after_ids]
    changed: List[Dict[str, Any]] = []
    for sid in before_ids:
        if sid in after_ids and _skill_meta_differs(before_ids[sid], after_ids[sid]):
            changed.append(
                {
                    "skill_id": sid,
                    "skill_name": after_ids[sid].get("skill_name") or before_ids[sid].get("skill_name"),
                    "before": before_ids[sid],
                    "after": after_ids[sid],
                }
            )
    kept = [
        before_ids[i]
        for i in sorted(before_ids)
        if i in after_ids and i not in {c["skill_id"] for c in changed}
    ]
    return {"added": added, "removed": removed, "changed": changed, "kept": kept}


def _skills_from_ai_payload(payload: Dict[str, Any]) -> List[Dict[str, Any]]:
    rows = payload.get("skills")
    if not isinstance(rows, list):
        return []
    return [_normalize_skill_row(r, ai_suggested=True) for r in rows if isinstance(r, dict) and r.get("skill_id")]


def _summary_from_ai_payload(payload: Dict[str, Any]) -> Optional[str]:
    block = payload.get("summary")
    if isinstance(block, dict):
        text = (block.get("text") or "").strip()
        return text or None
    if isinstance(block, str) and block.strip():
        return block.strip()
    return None


def _instructions_from_ai_payload(payload: Dict[str, Any]) -> Dict[str, str]:
    block = payload.get("instructions")
    if not isinstance(block, dict):
        return {}
    fields = block.get("fields")
    if not isinstance(fields, dict):
        return {}
    out: Dict[str, str] = {}
    for key in _INSTRUCTION_FIELDS:
        val = fields.get(key)
        if val is not None and str(val).strip():
            out[key] = str(val).strip()
    return out


def _instruction_snapshot(exercise: Dict[str, Any]) -> Dict[str, str]:
    out: Dict[str, str] = {}
    for key in _INSTRUCTION_FIELDS:
        raw = exercise.get(key)
        plain = strip_html_to_plain(raw, max_len=400) if raw else ""
        if plain.strip():
            out[key] = plain.strip()
    return out


def compute_instruction_diff(
    before: Dict[str, str],
    after: Dict[str, str],
) -> Dict[str, Any]:
    changed: List[Dict[str, Any]] = []
    added: List[str] = []
    for key in _INSTRUCTION_FIELDS:
        b = (before.get(key) or "").strip()
        a = (after.get(key) or "").strip()
        if not a:
            continue
        if not b:
            added.append(key)
        elif b != strip_html_to_plain(a, max_len=400).strip() and b != a:
            changed.append({"field": key, "before_plain": b, "after_html": a})
    return {"changed_fields": changed, "added_fields": added}


def preview_exercise_enrichment(
    cur,
    exercise_id: int,
    *,
    want_skills: bool = True,
    want_summary: bool = False,
    want_instructions: bool = False,
    merge_mode: SkillMergeMode = "additive",
) -> Dict[str, Any]:
    exercise = enrich_exercise_detail(exercise_id, cur)
    if not exercise:
        return {"exercise_id": exercise_id, "ok": False, "error": "Übung nicht gefunden"}

    skip_reason = validate_exercise_for_enrichment(
        exercise,
        want_skills=want_skills,
        want_summary=want_summary,
        want_instructions=want_instructions,
    )
    if skip_reason:
        return {
            "exercise_id": exercise_id,
            "ok": False,
            "skipped": True,
            "error": skip_reason,
            "title": exercise.get("title"),
            "status": exercise.get("status"),
        }

    existing = exercise.get("skills") or []
    suggested: List[Dict[str, Any]] = []
    ai_meta: Dict[str, Any] = {}
    payload: Dict[str, Any] = {}
    suggested_summary: Optional[str] = None
    suggested_instructions: Dict[str, str] = {}

    if want_skills or want_summary or want_instructions:
        ctx = build_form_context_from_exercise(exercise)
        payload = run_exercise_form_ai_suggestion(
            cur,
            ctx,
            want_summary=want_summary,
            want_skills=want_skills,
            want_instructions=want_instructions,
        )
        if want_skills:
            suggested = _skills_from_ai_payload(payload)
        if want_summary:
            suggested_summary = _summary_from_ai_payload(payload)
        if want_instructions:
            suggested_instructions = _instructions_from_ai_payload(payload)
        ai_meta = {
            "models": payload.get("models_by_slug") or {},
            "llm_calls": sum([want_skills, want_summary, want_instructions]),
        }

    merged = merge_skills(existing, suggested, merge_mode) if want_skills else list(existing)
    diff = compute_skill_diff(existing, merged) if want_skills else None

    existing_summary = (exercise.get("summary") or "").strip() or None
    instr_before = _instruction_snapshot(exercise)
    instr_after_plain = {
        k: strip_html_to_plain(v, max_len=400) for k, v in suggested_instructions.items()
    }
    instruction_diff = (
        compute_instruction_diff(instr_before, instr_after_plain) if want_instructions else None
    )

    return {
        "exercise_id": exercise_id,
        "ok": True,
        "title": exercise.get("title"),
        "status": exercise.get("status"),
        "visibility": exercise.get("visibility"),
        "primary_focus_name": _primary_focus_from_exercise(exercise),
        "existing_skills": existing,
        "suggested_skills": suggested,
        "merged_skills": merged,
        "diff": diff,
        "existing_summary": existing_summary,
        "suggested_summary": suggested_summary,
        "existing_instructions": instr_before,
        "suggested_instructions": suggested_instructions,
        "instruction_diff": instruction_diff,
        "ai_meta": ai_meta,
    }


def _primary_focus_from_exercise(exercise: Dict[str, Any]) -> Optional[str]:
    for row in exercise.get("focus_areas") or []:
        if isinstance(row, dict) and row.get("is_primary"):
            return (row.get("name") or "").strip() or None
    for row in exercise.get("focus_areas") or []:
        if isinstance(row, dict):
            nm = (row.get("name") or "").strip()
            if nm:
                return nm
    return None


def persist_merged_skills(cur, exercise_id: int, merged: List[Dict[str, Any]], merge_mode: SkillMergeMode) -> None:
    if merge_mode == "replace_all":
        cur.execute("DELETE FROM exercise_skills WHERE exercise_id = %s", (exercise_id,))
    elif merge_mode == "replace_ai_only":
        cur.execute(
            "DELETE FROM exercise_skills WHERE exercise_id = %s AND ai_suggested = true",
            (exercise_id,),
        )

    for sk in merged:
        cur.execute(
            """
            INSERT INTO exercise_skills
                (exercise_id, skill_id, is_primary, intensity, required_level, target_level, ai_suggested)
            VALUES (%s, %s, %s, %s, %s, %s, %s)
            ON CONFLICT (exercise_id, skill_id) DO UPDATE SET
                intensity = CASE
                    WHEN exercise_skills.ai_suggested = false AND %s = 'additive'
                    THEN exercise_skills.intensity ELSE EXCLUDED.intensity END,
                required_level = CASE
                    WHEN exercise_skills.ai_suggested = false AND %s = 'additive'
                    THEN exercise_skills.required_level ELSE EXCLUDED.required_level END,
                target_level = CASE
                    WHEN exercise_skills.ai_suggested = false AND %s = 'additive'
                    THEN exercise_skills.target_level ELSE EXCLUDED.target_level END,
                is_primary = CASE
                    WHEN exercise_skills.ai_suggested = false AND %s = 'additive'
                    THEN exercise_skills.is_primary ELSE EXCLUDED.is_primary END,
                ai_suggested = CASE
                    WHEN exercise_skills.ai_suggested = false AND %s = 'additive'
                    THEN exercise_skills.ai_suggested ELSE EXCLUDED.ai_suggested END
            """,
            (
                exercise_id,
                int(sk["skill_id"]),
                bool(sk.get("is_primary")),
                normalize_exercise_skill_intensity(sk.get("intensity")),
                normalize_exercise_skill_level(sk.get("required_level")),
                normalize_exercise_skill_level(sk.get("target_level")),
                bool(sk.get("ai_suggested")),
                merge_mode,
                merge_mode,
                merge_mode,
                merge_mode,
                merge_mode,
            ),
        )


def _normalize_instruction_fields(fields: Optional[Dict[str, Any]]) -> Dict[str, str]:
    if not fields:
        return {}
    out: Dict[str, str] = {}
    for key in _INSTRUCTION_FIELDS:
        if key not in fields:
            continue
        raw = fields.get(key)
        if raw is None or not str(raw).strip():
            continue
        out[key] = normalize_inline_exercise_media_markup(str(raw).strip())
    return out


def apply_exercise_enrichment(
    cur,
    exercise_id: int,
    *,
    merged_skills: Optional[List[Dict[str, Any]]] = None,
    merge_mode: SkillMergeMode = "additive",
    set_status: Optional[str] = DEFAULT_SET_STATUS,
    apply_skills: bool = False,
    summary_text: Optional[str] = None,
    apply_summary: bool = False,
    instruction_fields: Optional[Dict[str, Any]] = None,
    apply_instructions: bool = False,
) -> Dict[str, Any]:
    exercise = enrich_exercise_detail(exercise_id, cur)
    if not exercise:
        return {"exercise_id": exercise_id, "ok": False, "error": "Übung nicht gefunden"}

    skip_reason = validate_exercise_for_enrichment(
        exercise,
        want_skills=apply_skills,
        want_summary=apply_summary,
        want_instructions=apply_instructions,
    )
    if skip_reason:
        return {
            "exercise_id": exercise_id,
            "ok": False,
            "skipped": True,
            "error": skip_reason,
        }

    skills_list = merged_skills or []
    if apply_skills:
        if not skills_list and merge_mode != "replace_all":
            return {
                "exercise_id": exercise_id,
                "ok": False,
                "error": "Keine Skills zum Anwenden",
            }
        persist_merged_skills(cur, exercise_id, skills_list, merge_mode)

    sets: List[str] = []
    vals: List[Any] = []

    if apply_summary and summary_text is not None:
        text = str(summary_text).strip()
        if text:
            sets.extend(["summary = %s", "summary_ai_generated = true"])
            vals.append(text[:220])

    if apply_instructions:
        norm = _normalize_instruction_fields(instruction_fields)
        for key, val in norm.items():
            sets.append(f"{key} = %s")
            vals.append(val)

    new_status = (set_status or "").strip().lower() or None
    if new_status:
        if new_status == "approved":
            return {
                "exercise_id": exercise_id,
                "ok": False,
                "error": "Automatisches Freigeben (approved) ist nicht erlaubt",
            }
        if new_status not in ("draft", "in_review", "archived"):
            return {"exercise_id": exercise_id, "ok": False, "error": "Ungültiger Ziel-Status"}
        sets.append("status = %s")
        vals.append(new_status)

    if sets:
        sets.append("updated_at = NOW()")
        vals.append(exercise_id)
        cur.execute(
            f"UPDATE exercises SET {', '.join(sets)} WHERE id = %s",
            tuple(vals),
        )
    elif not apply_skills:
        return {"exercise_id": exercise_id, "ok": False, "error": "Nichts anzuwenden"}

    return {
        "exercise_id": exercise_id,
        "ok": True,
        "status": new_status or exercise.get("status"),
        "skills_applied": len(skills_list) if apply_skills else 0,
        "summary_applied": apply_summary and bool(summary_text and str(summary_text).strip()),
        "instructions_applied": apply_instructions and bool(_normalize_instruction_fields(instruction_fields)),
    }


def estimate_llm_calls(
    *,
    exercise_count: int,
    want_skills: bool,
    want_summary: bool,
    want_instructions: bool = False,
) -> Dict[str, Any]:
    per_skills = exercise_count if want_skills else 0
    per_summary = exercise_count if want_summary else 0
    per_instructions = exercise_count if want_instructions else 0
    total = per_skills + per_summary + per_instructions
    return {
        "total": total,
        "per_exercise": sum([want_skills, want_summary, want_instructions]),
        "skills": per_skills,
        "summary": per_summary,
        "instructions": per_instructions,
    }