shinkan-jinkendo/backend/planning_exercise_target_pipeline.py

"""
Szenario-Routing und Erwartungsprofil-Pipeline für Planungs-Übungssuche (P1).

Ablauf:
  1. Heuristik: Intent + Szenario-Klasse aus Query/Kontext
  2. Optional LLM (planning_exercise_search_intent) bei komplexen Anfragen
  3. Deterministisches Basis-Profil (Rahmen, Plan, Anker)
  4. Query-Overlay mergen → PlanningTargetProfile für Vorselektion
"""
from __future__ import annotations

import re
from typing import Any, Dict, List, Mapping, Optional, Tuple

from planning_exercise_intent import (
    PlanningQueryIntentParsed,
    resolve_query_intent_catalog_ids,
    try_parse_planning_query_intent,
)
from planning_exercise_profiles import (
    PlanningTargetProfile,
    _merge_weight_maps,
    _normalize_weight_map,
    build_planning_target_profile,
)

SCENARIO_PRESET_NEXT = "preset_next"
SCENARIO_PROGRESSION = "progression"
SCENARIO_DEEPEN = "deepen"
SCENARIO_CONTINUE_PLAN = "continue_plan"
SCENARIO_ADDITIVE = "additive_constraint"
SCENARIO_FREE_SEARCH = "free_search"

_SIMPLE_PRESET_PATTERNS = (
    r"^(schlage?\s+(mir\s+)?(die\s+)?(n[aä]chste|naechste)\s+(sinnvolle\s+)?(übung|uebung)\s*(vor)?\.?)$",
    r"^(n[aä]chste|naechste)\s+(übung|uebung)\s*(vorschlag|vorschlagen|empfehl\w*)?\.?$",
    r"^(vorschlag|vorschlagen|empfehl\w*)\s*(für|fuer)?\s*(die\s+)?(n[aä]chste|naechste)?\s*(übung|uebung)?\.?$",
    r"^n[aä]chste\s+übung$",
    r"^n[aä]chste\s+uebung$",
)

_ADDITIVE_MARKERS = (
    "zusätzlich",
    "zusaetzlich",
    "auch ",
    " außerdem",
    " ausserdem",
    " dazu",
    " extra",
    " mehr ",
    " und dabei",
    " sowie ",
)


def _normalize_query(q: Optional[str]) -> str:
    return re.sub(r"\s+", " ", (q or "").strip())


def is_simple_preset_query(query: Optional[str]) -> bool:
    q = _normalize_query(query).lower()
    if not q:
        return True
    for pat in _SIMPLE_PRESET_PATTERNS:
        if re.match(pat, q, flags=re.IGNORECASE):
            return True
    return False


def classify_planning_scenario(
    query: Optional[str],
    heuristic_intent: str,
) -> str:
    q = _normalize_query(query).lower()
    if not q or is_simple_preset_query(q):
        return SCENARIO_PRESET_NEXT
    if heuristic_intent == "progression_next":
        return SCENARIO_PROGRESSION
    if heuristic_intent == "deepen_exercise":
        return SCENARIO_DEEPEN
    if any(m in f" {q} " for m in _ADDITIVE_MARKERS):
        return SCENARIO_ADDITIVE
    if heuristic_intent == "continue_plan_goal":
        return SCENARIO_CONTINUE_PLAN
    if heuristic_intent == "free_search":
        return SCENARIO_FREE_SEARCH
    if heuristic_intent == "suggest_next":
        return SCENARIO_CONTINUE_PLAN
    return SCENARIO_FREE_SEARCH


def should_run_llm_intent_pipeline(
    query: Optional[str],
    scenario: str,
    *,
    include_llm_intent: bool,
) -> bool:
    if not include_llm_intent:
        return False
    if scenario == SCENARIO_PRESET_NEXT:
        return False
    return bool(_normalize_query(query))


def _recalculate_skill_gap(target: PlanningTargetProfile) -> PlanningTargetProfile:
    skill_target = _normalize_weight_map(dict(target.skill_weights))
    skill_plan_norm = _normalize_weight_map(dict(target.skill_plan_weights))
    skill_gap: Dict[int, float] = {}
    for sid, tw in skill_target.items():
        pw = skill_plan_norm.get(sid, 0.0)
        gap = tw - pw * 0.85
        if gap > 0.08:
            skill_gap[sid] = gap
    sources = list(target.sources)
    if skill_gap and "skill_gap_vs_plan" not in sources:
        sources.append("skill_gap_vs_plan")
    elif not skill_gap:
        sources = [s for s in sources if s != "skill_gap_vs_plan"]
    return PlanningTargetProfile(
        focus_area_ids=target.focus_area_ids,
        style_direction_ids=target.style_direction_ids,
        training_type_ids=target.training_type_ids,
        target_group_ids=target.target_group_ids,
        skill_weights=skill_target,
        skill_gap_weights=_normalize_weight_map(skill_gap) if skill_gap else {},
        skill_plan_weights=target.skill_plan_weights,
        sources=sources,
    )


def merge_query_overlay_into_target(
    base: PlanningTargetProfile,
    *,
    focus: Dict[int, float],
    style: Dict[int, float],
    tt: Dict[int, float],
    tg: Dict[int, float],
    skills: Dict[int, float],
    emphasis: str = "additive",
    scenario: str,
) -> PlanningTargetProfile:
    sources = list(base.sources)
    if "query_intent" not in sources:
        sources.append("query_intent")

    if emphasis == "replace" or scenario == SCENARIO_FREE_SEARCH:
        skill_w = _merge_weight_maps({}, skills, scale=1.0)
        if skills:
            skill_w = _normalize_weight_map(_merge_weight_maps(base.skill_weights, skills, scale=0.55))
            if emphasis == "replace":
                skill_w = _normalize_weight_map(skills)
        focus_w = _merge_weight_maps(base.focus_area_ids, focus, scale=0.5 if emphasis == "replace" else 0.85)
        style_w = _merge_weight_maps(base.style_direction_ids, style, scale=0.5)
        tt_w = _merge_weight_maps(base.training_type_ids, tt, scale=0.5)
        tg_w = _merge_weight_maps(base.target_group_ids, tg, scale=0.5)
    else:
        skill_scale = 1.0 if scenario == SCENARIO_ADDITIVE else 0.85
        skill_w = _merge_weight_maps(base.skill_weights, skills, scale=skill_scale)
        focus_w = _merge_weight_maps(base.focus_area_ids, focus, scale=0.9)
        style_w = _merge_weight_maps(base.style_direction_ids, style, scale=0.75)
        tt_w = _merge_weight_maps(base.training_type_ids, tt, scale=0.75)
        tg_w = _merge_weight_maps(base.target_group_ids, tg, scale=0.75)

    out = PlanningTargetProfile(
        focus_area_ids=_normalize_weight_map(focus_w) if focus_w else focus_w,
        style_direction_ids=_normalize_weight_map(style_w) if style_w else style_w,
        training_type_ids=_normalize_weight_map(tt_w) if tt_w else tt_w,
        target_group_ids=_normalize_weight_map(tg_w) if tg_w else tg_w,
        skill_weights=_normalize_weight_map(skill_w) if skill_w else skill_w,
        skill_gap_weights=dict(base.skill_gap_weights),
        skill_plan_weights=dict(base.skill_plan_weights),
        sources=sources,
    )
    return _recalculate_skill_gap(out)


def build_planning_target_with_query_pipeline(
    cur,
    *,
    unit: Dict[str, Any],
    planned_exercise_ids: List[int],
    anchor_exercise_id: Optional[int],
    query: Optional[str],
    heuristic_intent: str,
    include_llm_intent: bool,
    context_summary: Mapping[str, Any],
) -> Tuple[PlanningTargetProfile, str, str, Dict[str, Any]]:
    """
    Returns: target_profile, resolved_intent, scenario_kind, query_intent_summary dict
    """
    scenario = classify_planning_scenario(query, heuristic_intent)
    resolved_intent = heuristic_intent
    llm_applied = False
    parsed: Optional[PlanningQueryIntentParsed] = None
    resolved_skills: List[Dict[str, Any]] = []

    base = build_planning_target_profile(
        cur,
        unit=unit,
        planned_exercise_ids=planned_exercise_ids,
        anchor_exercise_id=anchor_exercise_id,
        intent=heuristic_intent,
    )
    base_summary = base.to_summary_dict(cur)

    if should_run_llm_intent_pipeline(query, scenario, include_llm_intent=include_llm_intent):
        parsed, llm_applied = try_parse_planning_query_intent(
            cur,
            query=_normalize_query(query),
            heuristic_intent=heuristic_intent,
            scenario_hint=scenario,
            context_summary=context_summary,
            target_profile_summary=base_summary,
        )

    target = base
    if parsed and llm_applied:
        if parsed.intent in {
            "suggest_next",
            "progression_next",
            "deepen_exercise",
            "continue_plan_goal",
            "free_search",
        }:
            resolved_intent = parsed.intent
        if parsed.scenario in VALID_SCENARIOS_SET:
            scenario = parsed.scenario

        focus, style, tt, tg, skills, resolved_skills = resolve_query_intent_catalog_ids(cur, parsed)
        if focus or style or tt or tg or skills:
            target = merge_query_overlay_into_target(
                base,
                focus=focus,
                style=style,
                tt=tt,
                tg=tg,
                skills=skills,
                emphasis=parsed.emphasis,
                scenario=scenario,
            )

    query_intent_summary: Dict[str, Any] = {
        "scenario": scenario,
        "intent": resolved_intent,
        "heuristic_intent": heuristic_intent,
        "llm_applied": llm_applied,
        "emphasis": parsed.emphasis if parsed else None,
        "rationale": (parsed.rationale if parsed else None),
        "skill_hints_resolved": resolved_skills,
        "requires_partner": parsed.requires_partner if parsed else None,
    }

    return target, resolved_intent, scenario, query_intent_summary


VALID_SCENARIOS_SET = {
    SCENARIO_PRESET_NEXT,
    SCENARIO_PROGRESSION,
    SCENARIO_DEEPEN,
    SCENARIO_CONTINUE_PLAN,
    SCENARIO_ADDITIVE,
    SCENARIO_FREE_SEARCH,
}


def compose_retrieval_phase(*, query_intent: bool, llm_rank: bool) -> str:
    parts = ["profile_v1"]
    if query_intent:
        parts.append("query_intent")
    if llm_rank:
        parts.append("llm_rank")
    return "+".join(parts)


__all__ = [
    "SCENARIO_ADDITIVE",
    "SCENARIO_PRESET_NEXT",
    "build_planning_target_with_query_pipeline",
    "classify_planning_scenario",
    "compose_retrieval_phase",
    "is_simple_preset_query",
    "merge_query_overlay_into_target",
    "should_run_llm_intent_pipeline",
]