shinkan-jinkendo/backend/skill_scoring.py

"""
Gewichtetes Fähigkeiten-Scoring aus Übungsvorkommen (Phase 3, regelbasiert).

Aggregiert exercise_skills über alle Übungen eines Artefakts mit Gewichten aus:
geplanter Dauer, Vorkommen, Intensität (Nutzeneinschätzung) und Stufen-Spanne (von/bis).

is_primary wird bewusst nicht genutzt (perspektivabhängig). development_contribution ist
in der UI nicht gepflegt und wird ignoriert.
"""
from __future__ import annotations

from collections import defaultdict
from dataclasses import dataclass
from datetime import datetime, timezone
from typing import Any, Dict, Iterable, List, Optional, Sequence, Tuple


DEFAULT_ITEM_MINUTES = 8
GRAPH_DEFAULT_ITEM_MINUTES = 10

_INTENSITY_MULT = {
    "niedrig": 0.85,
    "low": 0.85,
    "mittel": 1.0,
    "medium": 1.0,
    "hoch": 1.2,
    "high": 1.2,
}

# Synchron zu backend/routers/exercises.py _EXERCISE_SKILL_LEVEL_RANK_SQL / skillLevels.js
_LEVEL_RANK = {
    "basis": 1,
    "grundlagen": 2,
    "aufbau": 3,
    "fortgeschritten": 4,
    "optimierung": 5,
    "einsteiger": 1,
    "experte": 5,
    "1": 1,
    "2": 2,
    "3": 3,
    "4": 4,
    "5": 5,
}


def _level_rank(value: Optional[str]) -> Optional[int]:
    if value is None:
        return None
    key = str(value).strip().lower()
    if not key:
        return None
    rank = _LEVEL_RANK.get(key)
    return rank if rank is not None else None


def _level_range_multiplier(
    required_level: Optional[str] = None,
    target_level: Optional[str] = None,
) -> float:
    """
    Stufen-Spanne (von/bis): breitere und höhere Entwicklungsstufen → etwas höheres Gewicht.
    Fehlen beide Angaben: neutral (1.0).
    """
    rr = _level_rank(required_level)
    rt = _level_rank(target_level)
    if rr is None and rt is None:
        return 1.0
    if rr is None:
        rr = rt
    if rt is None:
        rt = rr
    if rr > rt:
        rr, rt = rt, rr
    span = max(1, min(5, rt - rr + 1))
    midpoint = (rr + rt) / 2.0
    span_mult = 0.92 + 0.04 * span
    depth_mult = 0.95 + 0.025 * midpoint
    return span_mult * depth_mult


@dataclass(frozen=True)
class ExerciseOccurrence:
    exercise_id: int
    planned_duration_min: Optional[int] = None
    """Optional label for UI (e.g. slot title)."""
    context_label: Optional[str] = None


def _item_base_minutes(planned: Optional[int], default: int = DEFAULT_ITEM_MINUTES) -> float:
    if planned is not None:
        try:
            m = int(planned)
            if m > 0:
                return float(m)
        except (TypeError, ValueError):
            pass
    return float(default)


def _skill_link_multiplier(
    *,
    intensity: Optional[str] = None,
    required_level: Optional[str] = None,
    target_level: Optional[str] = None,
) -> float:
    mult = 1.0
    if intensity:
        key = str(intensity).strip().lower()
        mult *= _INTENSITY_MULT.get(key, 1.0)
    mult *= _level_range_multiplier(required_level, target_level)
    return mult


def _round2(val: float) -> float:
    return round(val, 2)


def _build_by_main_category(skills_out: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
    """Hierarchie Hauptkategorie → Unterkategorie, je Top-Fähigkeit nach absolutem Gewicht."""
    main_map: Dict[int, Dict[str, Any]] = {}

    for sk in skills_out:
        mc_id = int(sk.get("main_category_id") or 0)
        mc_name = (sk.get("main_category_name") or "").strip() or "—"
        cat_id = int(sk.get("category_id") or 0)
        cat_name = (sk.get("category_name") or sk.get("category") or "").strip() or "—"

        if mc_id not in main_map:
            main_map[mc_id] = {
                "main_category_id": mc_id if mc_id else None,
                "main_category_name": mc_name,
                "weight": 0.0,
                "categories": {},
            }
        main = main_map[mc_id]
        main["weight"] += float(sk.get("weight") or 0)

        if cat_id not in main["categories"]:
            main["categories"][cat_id] = {
                "category_id": cat_id if cat_id else None,
                "category_name": cat_name,
                "weight": 0.0,
                "skills": [],
            }
        cat = main["categories"][cat_id]
        cat["weight"] += float(sk.get("weight") or 0)
        cat["skills"].append(sk)

    result: List[Dict[str, Any]] = []
    for mc in sorted(main_map.values(), key=lambda x: (-x["weight"], x.get("main_category_name") or "")):
        cats_out: List[Dict[str, Any]] = []
        for cat in sorted(
            mc["categories"].values(),
            key=lambda x: (-x["weight"], x.get("category_name") or ""),
        ):
            cat_skills = sorted(
                cat["skills"],
                key=lambda x: (-float(x.get("weight") or 0), x.get("skill_name") or ""),
            )
            top = cat_skills[0] if cat_skills else None
            cats_out.append(
                {
                    "category_id": cat["category_id"],
                    "category_name": cat["category_name"],
                    "weight": _round2(cat["weight"]),
                    "skills_count": len(cat_skills),
                    "top_skill": top,
                }
            )
        result.append(
            {
                "main_category_id": mc["main_category_id"],
                "main_category_name": mc["main_category_name"],
                "weight": _round2(mc["weight"]),
                "categories": cats_out,
            }
        )
    return result


def _apply_reference_universal_percent(
    skills_out: List[Dict[str, Any]],
    reference_max_by_skill: Optional[Dict[int, float]] = None,
) -> None:
    """
    Optional: Stärke relativ zum Maximum in der sichtbaren Bibliothek (gleiche Skala über Artefakte).
    """
    if not reference_max_by_skill:
        for sk in skills_out:
            sk["universal_percent"] = None
        return
    for sk in skills_out:
        sid = int(sk["skill_id"])
        ref = float(reference_max_by_skill.get(sid) or 0)
        w = float(sk.get("weight") or 0)
        sk["universal_percent"] = _round2(w / ref * 100.0) if ref > 0 else None


def compute_skill_profile(
    occurrences: Sequence[ExerciseOccurrence],
    skill_rows_by_exercise: Dict[int, List[Dict[str, Any]]],
    *,
    default_item_minutes: int = DEFAULT_ITEM_MINUTES,
    reference_max_by_skill: Optional[Dict[int, float]] = None,
) -> Dict[str, Any]:
    """
    Erzeugt ein normalisiertes Fähigkeiten-Profil aus Übungsvorkommen und exercise_skills.
    """
    exercise_meta: Dict[int, Dict[str, Any]] = defaultdict(
        lambda: {"occurrence_count": 0, "minutes": 0.0, "context_labels": []}
    )
    total_occurrences = 0
    for occ in occurrences or []:
        eid = int(occ.exercise_id)
        mins = _item_base_minutes(occ.planned_duration_min, default_item_minutes)
        exercise_meta[eid]["occurrence_count"] += 1
        exercise_meta[eid]["minutes"] += mins
        total_occurrences += 1
        if occ.context_label and occ.context_label not in exercise_meta[eid]["context_labels"]:
            exercise_meta[eid]["context_labels"].append(occ.context_label)

    skill_acc: Dict[int, Dict[str, Any]] = {}
    total_weight = 0.0
    exercises_with_skills: set[int] = set()

    for eid, meta in exercise_meta.items():
        links = skill_rows_by_exercise.get(eid) or []
        if not links:
            continue
        exercises_with_skills.add(eid)
        occ_count = meta["occurrence_count"]
        minutes_per_occ = meta["minutes"] / occ_count if occ_count else float(default_item_minutes)

        for link in links:
            sid = link.get("skill_id")
            if sid is None:
                continue
            sid = int(sid)
            link_mult = _skill_link_multiplier(
                intensity=link.get("intensity"),
                required_level=link.get("required_level"),
                target_level=link.get("target_level"),
            )
            contribution = minutes_per_occ * occ_count * link_mult
            if contribution <= 0:
                continue

            if sid not in skill_acc:
                skill_acc[sid] = {
                    "skill_id": sid,
                    "skill_name": link.get("skill_name") or f"Fähigkeit #{sid}",
                    "category": link.get("category_name") or link.get("category"),
                    "category_id": link.get("category_id"),
                    "category_name": link.get("category_name") or link.get("category"),
                    "main_category_id": link.get("main_category_id"),
                    "main_category_name": link.get("main_category_name"),
                    "focus_areas": link.get("focus_areas"),
                    "weight": 0.0,
                    "occurrence_count": 0,
                    "exercises": {},
                }
            acc = skill_acc[sid]
            acc["weight"] += contribution
            acc["occurrence_count"] += occ_count
            ex_key = str(eid)
            if ex_key not in acc["exercises"]:
                acc["exercises"][ex_key] = {
                    "exercise_id": eid,
                    "title": link.get("exercise_title") or f"Übung #{eid}",
                    "weight": 0.0,
                    "occurrence_count": occ_count,
                }
            acc["exercises"][ex_key]["weight"] += contribution
            total_weight += contribution

    skills_out: List[Dict[str, Any]] = []
    for sid, acc in skill_acc.items():
        share = (acc["weight"] / total_weight * 100.0) if total_weight > 0 else 0.0
        ex_list = sorted(
            acc["exercises"].values(),
            key=lambda x: (-x["weight"], x.get("title") or ""),
        )[:8]
        for ex in ex_list:
            ex["weight"] = _round2(ex["weight"])
            if total_weight > 0:
                ex["share_percent"] = _round2(ex["weight"] / total_weight * 100.0)
            else:
                ex["share_percent"] = 0.0
        skills_out.append(
            {
                "skill_id": sid,
                "skill_name": acc["skill_name"],
                "category": acc.get("category_name") or acc.get("category"),
                "category_id": acc.get("category_id"),
                "category_name": acc.get("category_name") or acc.get("category"),
                "main_category_id": acc.get("main_category_id"),
                "main_category_name": acc.get("main_category_name"),
                "focus_areas": acc.get("focus_areas"),
                "weight": _round2(acc["weight"]),
                "score": _round2(acc["weight"]),
                "artifact_share_percent": _round2(share),
                "share_percent": _round2(share),
                "occurrence_count": acc["occurrence_count"],
                "top_exercises": ex_list,
            }
        )
    skills_out.sort(key=lambda x: (-x["weight"], x.get("skill_name") or ""))

    _apply_reference_universal_percent(skills_out, reference_max_by_skill)

    by_main_category = _build_by_main_category(skills_out)
    for mc in by_main_category:
        for cat in mc.get("categories") or []:
            top = cat.get("top_skill")
            if top and reference_max_by_skill:
                sid = int(top["skill_id"])
                ref = float(reference_max_by_skill.get(sid) or 0)
                if ref > 0:
                    top["universal_percent"] = _round2(float(top["weight"]) / ref * 100.0)

    unique_exercises = len(exercise_meta)
    return {
        "computed_at": datetime.now(timezone.utc).isoformat(),
        "scoring_version": "1.2",
        "score_unit": "weighted_minutes",
        "score_unit_label": "Trainingsgewicht (gewichtete Minuten, über Programme vergleichbar)",
        "total_weight": _round2(total_weight),
        "total_score": _round2(total_weight),
        "exercise_occurrence_count": total_occurrences,
        "distinct_exercise_count": unique_exercises,
        "exercises_with_skills_count": len(exercises_with_skills),
        "skills": skills_out,
        "by_main_category": by_main_category,
        "has_reference_scale": bool(reference_max_by_skill),
    }


def fetch_exercise_skills_bulk(
    cur, exercise_ids: Iterable[int]
) -> Dict[int, List[Dict[str, Any]]]:
    ids = sorted({int(x) for x in exercise_ids if x})
    if not ids:
        return {}
    ph = ",".join(["%s"] * len(ids))
    cur.execute(
        f"""
        SELECT es.exercise_id, es.skill_id, es.is_primary, es.intensity,
               es.development_contribution, es.required_level, es.target_level,
               s.name AS skill_name, s.category,
               sc.id AS category_id, sc.name AS category_name,
               mc.id AS main_category_id, mc.name AS main_category_name,
               s.focus_areas,
               e.title AS exercise_title
        FROM exercise_skills es
        JOIN skills s ON s.id = es.skill_id
        LEFT JOIN skill_categories sc ON sc.id = s.category_id
        LEFT JOIN skill_main_categories mc ON mc.id = COALESCE(s.main_category_id, sc.main_category_id)
        JOIN exercises e ON e.id = es.exercise_id
        WHERE es.exercise_id IN ({ph})
          AND (s.status = 'active' OR s.status IS NULL)
        ORDER BY es.exercise_id, s.name, es.skill_id
        """,
        ids,
    )
    out: Dict[int, List[Dict[str, Any]]] = defaultdict(list)
    for row in cur.fetchall():
        d = dict(row)
        eid = int(d["exercise_id"])
        fa = d.get("focus_areas")
        if fa is not None and not isinstance(fa, list):
            try:
                import json

                fa = json.loads(fa) if isinstance(fa, str) else fa
            except Exception:
                fa = []
        d["focus_areas"] = fa if isinstance(fa, list) else []
        out[eid].append(d)
    return dict(out)


def collect_unit_exercise_occurrences(cur, unit_id: int) -> List[ExerciseOccurrence]:
    cur.execute(
        """
        SELECT tusi.exercise_id, tusi.planned_duration_min
        FROM training_unit_section_items tusi
        INNER JOIN training_unit_sections tus ON tus.id = tusi.section_id
        WHERE tus.training_unit_id = %s
          AND tusi.item_type = 'exercise'
          AND tusi.exercise_id IS NOT NULL
        ORDER BY tus.order_index, tusi.order_index
        """,
        (int(unit_id),),
    )
    return [
        ExerciseOccurrence(
            exercise_id=int(r["exercise_id"]),
            planned_duration_min=r.get("planned_duration_min"),
        )
        for r in cur.fetchall()
    ]


def collect_module_exercise_occurrences(cur, module_id: int) -> List[ExerciseOccurrence]:
    cur.execute(
        """
        SELECT exercise_id, planned_duration_min
        FROM training_module_items
        WHERE module_id = %s
          AND item_type = 'exercise'
          AND exercise_id IS NOT NULL
        ORDER BY order_index
        """,
        (int(module_id),),
    )
    return [
        ExerciseOccurrence(
            exercise_id=int(r["exercise_id"]),
            planned_duration_min=r.get("planned_duration_min"),
        )
        for r in cur.fetchall()
    ]


def collect_progression_graph_exercise_occurrences(cur, graph_id: int) -> List[ExerciseOccurrence]:
    """Jedes Vorkommen als from- oder to-Endpunkt einer Kante zählt (ohne Dauer → Default)."""
    cur.execute(
        """
        SELECT from_exercise_id AS exercise_id FROM exercise_progression_edges WHERE graph_id = %s
        UNION ALL
        SELECT to_exercise_id AS exercise_id FROM exercise_progression_edges WHERE graph_id = %s
        """,
        (int(graph_id), int(graph_id)),
    )
    return [
        ExerciseOccurrence(
            exercise_id=int(r["exercise_id"]),
            planned_duration_min=None,
            context_label=None,
        )
        for r in cur.fetchall()
    ]


def profile_for_occurrences(
    cur,
    occurrences: Sequence[ExerciseOccurrence],
    *,
    default_item_minutes: int = DEFAULT_ITEM_MINUTES,
    reference_max_by_skill: Optional[Dict[int, float]] = None,
) -> Dict[str, Any]:
    eids = [o.exercise_id for o in occurrences]
    skills_map = fetch_exercise_skills_bulk(cur, eids)
    return compute_skill_profile(
        occurrences,
        skills_map,
        default_item_minutes=default_item_minutes,
        reference_max_by_skill=reference_max_by_skill,
    )


def merge_skill_weights_into_max(
    target: Dict[int, float],
    profile: Dict[str, Any],
) -> None:
    for sk in profile.get("skills") or []:
        sid = int(sk["skill_id"])
        w = float(sk.get("weight") or 0)
        if w > target.get(sid, 0.0):
            target[sid] = w


def compute_corpus_skill_max_weights(
    cur,
    *,
    profile_id: int,
    role: Optional[str],
    effective_club_id: Optional[int],
    limit_per_type: int = 50,
) -> Dict[int, float]:
    """
    Maximum absolutes Trainingsgewicht je Fähigkeit über sichtbare Bibliotheksartefakte.
    Basis für universal_percent (Skala über alle Programme).
    """
    from tenant_context import library_content_visibility_sql

    max_by_skill: Dict[int, float] = {}

    def scan_frameworks():
        vis_clause, vis_params = library_content_visibility_sql(
            alias="fp",
            profile_id=profile_id,
            role=role,
            effective_club_id=effective_club_id,
        )
        cur.execute(
            f"""
            SELECT fp.id FROM training_framework_programs fp
            WHERE ({vis_clause})
            ORDER BY fp.updated_at DESC NULLS LAST
            LIMIT %s
            """,
            (*vis_params, limit_per_type),
        )
        for row in cur.fetchall():
            fid = int(row["id"])
            cur.execute(
                """
                SELECT tu.id
                FROM training_framework_slots s
                INNER JOIN training_units tu ON tu.framework_slot_id = s.id
                WHERE s.framework_program_id = %s
                """,
                (fid,),
            )
            occ: List[ExerciseOccurrence] = []
            for u in cur.fetchall():
                occ.extend(collect_unit_exercise_occurrences(cur, int(u["id"])))
            if not occ:
                continue
            prof = profile_for_occurrences(cur, occ)
            merge_skill_weights_into_max(max_by_skill, prof)

    def scan_modules():
        vis_clause, vis_params = library_content_visibility_sql(
            alias="m",
            profile_id=profile_id,
            role=role,
            effective_club_id=effective_club_id,
        )
        cur.execute(
            f"""
            SELECT m.id FROM training_modules m
            WHERE ({vis_clause})
            ORDER BY m.updated_at DESC NULLS LAST
            LIMIT %s
            """,
            (*vis_params, limit_per_type),
        )
        for row in cur.fetchall():
            mid = int(row["id"])
            occ = collect_module_exercise_occurrences(cur, mid)
            if not occ:
                continue
            prof = profile_for_occurrences(cur, occ)
            merge_skill_weights_into_max(max_by_skill, prof)

    def scan_graphs():
        vis_clause, vis_params = library_content_visibility_sql(
            alias="g",
            profile_id=profile_id,
            role=role,
            effective_club_id=effective_club_id,
        )
        cur.execute(
            f"""
            SELECT g.id FROM exercise_progression_graphs g
            WHERE ({vis_clause})
            ORDER BY g.updated_at DESC NULLS LAST
            LIMIT %s
            """,
            (*vis_params, limit_per_type),
        )
        for row in cur.fetchall():
            gid = int(row["id"])
            occ = collect_progression_graph_exercise_occurrences(cur, gid)
            if not occ:
                continue
            prof = profile_for_occurrences(
                cur, occ, default_item_minutes=GRAPH_DEFAULT_ITEM_MINUTES
            )
            merge_skill_weights_into_max(max_by_skill, prof)

    scan_frameworks()
    scan_modules()
    scan_graphs()
    return max_by_skill


def match_score_for_skill_ids(profile: Dict[str, Any], skill_ids: Sequence[int]) -> Dict[str, Any]:
    """Überlappung eines Profils mit gewünschten Fähigkeiten (für Vorschläge)."""
    wanted = {int(x) for x in skill_ids if x is not None}
    if not wanted:
        return {
            "match_weight": 0.0,
            "match_score": 0.0,
            "match_percent": 0.0,
            "artifact_focus_percent": 0.0,
            "matched_skill_ids": [],
            "matched_skills": [],
        }
    matched = []
    match_weight = 0.0
    total = float(profile.get("total_weight") or 0)
    for sk in profile.get("skills") or []:
        sid = int(sk["skill_id"])
        if sid in wanted:
            matched.append(sk)
            match_weight += float(sk.get("weight") or 0)
    artifact_focus = (match_weight / total * 100.0) if total > 0 else 0.0
    return {
        "match_weight": _round2(match_weight),
        "match_score": _round2(match_weight),
        "match_percent": _round2(artifact_focus),
        "artifact_focus_percent": _round2(artifact_focus),
        "matched_skill_ids": [int(m["skill_id"]) for m in matched],
        "matched_skills": matched,
    }