shinkan-jinkendo/backend/skill_scoring.py

"""
Gewichtetes Fähigkeiten-Scoring aus Übungsvorkommen (Phase 3, regelbasiert).

Aggregiert exercise_skills über alle Übungen eines Artefakts mit Gewichten aus:
geplanter Dauer, Vorkommen, Intensität (Nutzeneinschätzung) und Stufen-Spanne (von/bis).

is_primary wird bewusst nicht genutzt (perspektivabhängig). development_contribution ist
in der UI nicht gepflegt und wird ignoriert.
"""
from __future__ import annotations

from collections import defaultdict
from dataclasses import dataclass
from datetime import datetime, timezone
from typing import Any, Dict, Iterable, List, Optional, Sequence, Tuple


DEFAULT_ITEM_MINUTES = 8
GRAPH_DEFAULT_ITEM_MINUTES = 10

_INTENSITY_MULT = {
    "niedrig": 0.85,
    "low": 0.85,
    "mittel": 1.0,
    "medium": 1.0,
    "hoch": 1.2,
    "high": 1.2,
}

# Synchron zu backend/routers/exercises.py _EXERCISE_SKILL_LEVEL_RANK_SQL / skillLevels.js
_LEVEL_RANK = {
    "basis": 1,
    "grundlagen": 2,
    "aufbau": 3,
    "fortgeschritten": 4,
    "optimierung": 5,
    "einsteiger": 1,
    "experte": 5,
    "1": 1,
    "2": 2,
    "3": 3,
    "4": 4,
    "5": 5,
}


def _level_rank(value: Optional[str]) -> Optional[int]:
    if value is None:
        return None
    key = str(value).strip().lower()
    if not key:
        return None
    rank = _LEVEL_RANK.get(key)
    return rank if rank is not None else None


def _level_range_multiplier(
    required_level: Optional[str] = None,
    target_level: Optional[str] = None,
) -> float:
    """
    Stufen-Spanne (von/bis): breitere und höhere Entwicklungsstufen → etwas höheres Gewicht.
    Fehlen beide Angaben: neutral (1.0).
    """
    rr = _level_rank(required_level)
    rt = _level_rank(target_level)
    if rr is None and rt is None:
        return 1.0
    if rr is None:
        rr = rt
    if rt is None:
        rt = rr
    if rr > rt:
        rr, rt = rt, rr
    span = max(1, min(5, rt - rr + 1))
    midpoint = (rr + rt) / 2.0
    span_mult = 0.92 + 0.04 * span
    depth_mult = 0.95 + 0.025 * midpoint
    return span_mult * depth_mult


@dataclass(frozen=True)
class ExerciseOccurrence:
    exercise_id: int
    planned_duration_min: Optional[int] = None
    """Optional label for UI (e.g. slot title)."""
    context_label: Optional[str] = None


def _item_base_minutes(planned: Optional[int], default: int = DEFAULT_ITEM_MINUTES) -> float:
    if planned is not None:
        try:
            m = int(planned)
            if m > 0:
                return float(m)
        except (TypeError, ValueError):
            pass
    return float(default)


def _skill_link_multiplier(
    *,
    intensity: Optional[str] = None,
    required_level: Optional[str] = None,
    target_level: Optional[str] = None,
) -> float:
    mult = 1.0
    if intensity:
        key = str(intensity).strip().lower()
        mult *= _INTENSITY_MULT.get(key, 1.0)
    mult *= _level_range_multiplier(required_level, target_level)
    return mult


def _round2(val: float) -> float:
    return round(val, 2)


def compute_skill_profile(
    occurrences: Sequence[ExerciseOccurrence],
    skill_rows_by_exercise: Dict[int, List[Dict[str, Any]]],
    *,
    default_item_minutes: int = DEFAULT_ITEM_MINUTES,
) -> Dict[str, Any]:
    """
    Erzeugt ein normalisiertes Fähigkeiten-Profil aus Übungsvorkommen und exercise_skills.
    """
    exercise_meta: Dict[int, Dict[str, Any]] = defaultdict(
        lambda: {"occurrence_count": 0, "minutes": 0.0, "context_labels": []}
    )
    total_occurrences = 0
    for occ in occurrences or []:
        eid = int(occ.exercise_id)
        mins = _item_base_minutes(occ.planned_duration_min, default_item_minutes)
        exercise_meta[eid]["occurrence_count"] += 1
        exercise_meta[eid]["minutes"] += mins
        total_occurrences += 1
        if occ.context_label and occ.context_label not in exercise_meta[eid]["context_labels"]:
            exercise_meta[eid]["context_labels"].append(occ.context_label)

    skill_acc: Dict[int, Dict[str, Any]] = {}
    total_weight = 0.0
    exercises_with_skills: set[int] = set()

    for eid, meta in exercise_meta.items():
        links = skill_rows_by_exercise.get(eid) or []
        if not links:
            continue
        exercises_with_skills.add(eid)
        occ_count = meta["occurrence_count"]
        minutes_per_occ = meta["minutes"] / occ_count if occ_count else float(default_item_minutes)

        for link in links:
            sid = link.get("skill_id")
            if sid is None:
                continue
            sid = int(sid)
            link_mult = _skill_link_multiplier(
                intensity=link.get("intensity"),
                required_level=link.get("required_level"),
                target_level=link.get("target_level"),
            )
            contribution = minutes_per_occ * occ_count * link_mult
            if contribution <= 0:
                continue

            if sid not in skill_acc:
                skill_acc[sid] = {
                    "skill_id": sid,
                    "skill_name": link.get("skill_name") or f"Fähigkeit #{sid}",
                    "category": link.get("category"),
                    "focus_areas": link.get("focus_areas"),
                    "weight": 0.0,
                    "occurrence_count": 0,
                    "exercises": {},
                }
            acc = skill_acc[sid]
            acc["weight"] += contribution
            acc["occurrence_count"] += occ_count
            ex_key = str(eid)
            if ex_key not in acc["exercises"]:
                acc["exercises"][ex_key] = {
                    "exercise_id": eid,
                    "title": link.get("exercise_title") or f"Übung #{eid}",
                    "weight": 0.0,
                    "occurrence_count": occ_count,
                }
            acc["exercises"][ex_key]["weight"] += contribution
            total_weight += contribution

    skills_out: List[Dict[str, Any]] = []
    for sid, acc in skill_acc.items():
        share = (acc["weight"] / total_weight * 100.0) if total_weight > 0 else 0.0
        ex_list = sorted(
            acc["exercises"].values(),
            key=lambda x: (-x["weight"], x.get("title") or ""),
        )[:8]
        for ex in ex_list:
            ex["weight"] = _round2(ex["weight"])
            if total_weight > 0:
                ex["share_percent"] = _round2(ex["weight"] / total_weight * 100.0)
            else:
                ex["share_percent"] = 0.0
        skills_out.append(
            {
                "skill_id": sid,
                "skill_name": acc["skill_name"],
                "category": acc.get("category"),
                "focus_areas": acc.get("focus_areas"),
                "weight": _round2(acc["weight"]),
                "share_percent": _round2(share),
                "occurrence_count": acc["occurrence_count"],
                "top_exercises": ex_list,
            }
        )
    skills_out.sort(key=lambda x: (-x["weight"], x.get("skill_name") or ""))

    by_category: Dict[str, float] = defaultdict(float)
    for sk in skills_out:
        cat = (sk.get("category") or "").strip() or "—"
        by_category[cat] += sk["weight"]
    category_rows = []
    for cat, w in sorted(by_category.items(), key=lambda x: (-x[1], x[0])):
        share = (w / total_weight * 100.0) if total_weight > 0 else 0.0
        category_rows.append(
            {"category": cat, "weight": _round2(w), "share_percent": _round2(share)}
        )

    unique_exercises = len(exercise_meta)
    return {
        "computed_at": datetime.now(timezone.utc).isoformat(),
        "scoring_version": "1.1",
        "total_weight": _round2(total_weight),
        "exercise_occurrence_count": total_occurrences,
        "distinct_exercise_count": unique_exercises,
        "exercises_with_skills_count": len(exercises_with_skills),
        "skills": skills_out,
        "by_category": category_rows,
    }


def fetch_exercise_skills_bulk(
    cur, exercise_ids: Iterable[int]
) -> Dict[int, List[Dict[str, Any]]]:
    ids = sorted({int(x) for x in exercise_ids if x})
    if not ids:
        return {}
    ph = ",".join(["%s"] * len(ids))
    cur.execute(
        f"""
        SELECT es.exercise_id, es.skill_id, es.is_primary, es.intensity,
               es.development_contribution, es.required_level, es.target_level,
               s.name AS skill_name, s.category, s.focus_areas,
               e.title AS exercise_title
        FROM exercise_skills es
        JOIN skills s ON s.id = es.skill_id
        JOIN exercises e ON e.id = es.exercise_id
        WHERE es.exercise_id IN ({ph})
          AND (s.status = 'active' OR s.status IS NULL)
        ORDER BY es.exercise_id, s.name, es.skill_id
        """,
        ids,
    )
    out: Dict[int, List[Dict[str, Any]]] = defaultdict(list)
    for row in cur.fetchall():
        d = dict(row)
        eid = int(d["exercise_id"])
        fa = d.get("focus_areas")
        if fa is not None and not isinstance(fa, list):
            try:
                import json

                fa = json.loads(fa) if isinstance(fa, str) else fa
            except Exception:
                fa = []
        d["focus_areas"] = fa if isinstance(fa, list) else []
        out[eid].append(d)
    return dict(out)


def collect_unit_exercise_occurrences(cur, unit_id: int) -> List[ExerciseOccurrence]:
    cur.execute(
        """
        SELECT tusi.exercise_id, tusi.planned_duration_min
        FROM training_unit_section_items tusi
        INNER JOIN training_unit_sections tus ON tus.id = tusi.section_id
        WHERE tus.training_unit_id = %s
          AND tusi.item_type = 'exercise'
          AND tusi.exercise_id IS NOT NULL
        ORDER BY tus.order_index, tusi.order_index
        """,
        (int(unit_id),),
    )
    return [
        ExerciseOccurrence(
            exercise_id=int(r["exercise_id"]),
            planned_duration_min=r.get("planned_duration_min"),
        )
        for r in cur.fetchall()
    ]


def collect_module_exercise_occurrences(cur, module_id: int) -> List[ExerciseOccurrence]:
    cur.execute(
        """
        SELECT exercise_id, planned_duration_min
        FROM training_module_items
        WHERE module_id = %s
          AND item_type = 'exercise'
          AND exercise_id IS NOT NULL
        ORDER BY order_index
        """,
        (int(module_id),),
    )
    return [
        ExerciseOccurrence(
            exercise_id=int(r["exercise_id"]),
            planned_duration_min=r.get("planned_duration_min"),
        )
        for r in cur.fetchall()
    ]


def collect_progression_graph_exercise_occurrences(cur, graph_id: int) -> List[ExerciseOccurrence]:
    """Jedes Vorkommen als from- oder to-Endpunkt einer Kante zählt (ohne Dauer → Default)."""
    cur.execute(
        """
        SELECT from_exercise_id AS exercise_id FROM exercise_progression_edges WHERE graph_id = %s
        UNION ALL
        SELECT to_exercise_id AS exercise_id FROM exercise_progression_edges WHERE graph_id = %s
        """,
        (int(graph_id), int(graph_id)),
    )
    return [
        ExerciseOccurrence(
            exercise_id=int(r["exercise_id"]),
            planned_duration_min=None,
            context_label=None,
        )
        for r in cur.fetchall()
    ]


def profile_for_occurrences(
    cur,
    occurrences: Sequence[ExerciseOccurrence],
    *,
    default_item_minutes: int = DEFAULT_ITEM_MINUTES,
) -> Dict[str, Any]:
    eids = [o.exercise_id for o in occurrences]
    skills_map = fetch_exercise_skills_bulk(cur, eids)
    return compute_skill_profile(
        occurrences, skills_map, default_item_minutes=default_item_minutes
    )


def match_score_for_skill_ids(profile: Dict[str, Any], skill_ids: Sequence[int]) -> Dict[str, Any]:
    """Überlappung eines Profils mit gewünschten Fähigkeiten (für Vorschläge)."""
    wanted = {int(x) for x in skill_ids if x is not None}
    if not wanted:
        return {
            "match_weight": 0.0,
            "match_percent": 0.0,
            "matched_skill_ids": [],
            "matched_skills": [],
        }
    matched = []
    match_weight = 0.0
    total = float(profile.get("total_weight") or 0)
    for sk in profile.get("skills") or []:
        sid = int(sk["skill_id"])
        if sid in wanted:
            matched.append(sk)
            match_weight += float(sk.get("weight") or 0)
    match_percent = (match_weight / total * 100.0) if total > 0 else 0.0
    return {
        "match_weight": _round2(match_weight),
        "match_percent": _round2(match_percent),
        "matched_skill_ids": [int(m["skill_id"]) for m in matched],
        "matched_skills": matched,
    }