diff --git a/backend/planning_exercise_retrieval.py b/backend/planning_exercise_retrieval.py new file mode 100644 index 0000000..217c51a --- /dev/null +++ b/backend/planning_exercise_retrieval.py @@ -0,0 +1,436 @@ +""" +Mehrstufiges Retrieval für Planungs-Übungssuche (S1b). + +Stufen: + S1b-0 Kandidaten-Pool (Profil-Signale, Volltext, Progressions-Nachfolger) + S1b-1 Profil-Vorselektion → Top-K vor teurem Hybrid-Score + S1b-2 Hybrid-Score (Volltext, Graph, Skills, Plan, Profil, Wiederholung) +""" +from __future__ import annotations + +from typing import Any, Dict, List, Mapping, Optional, Sequence, Set, Tuple + +from planning_exercise_profiles import ( + PlanningTargetProfile, + load_exercise_match_profiles_bulk, + score_exercise_against_target, +) + +_RAW_POOL_LIMIT = 500 +_PROFILE_PRESELECT_LIMIT = 160 + + +def _skill_jaccard(a: Set[int], b: Set[int]) -> float: + if not a or not b: + return 0.0 + inter = len(a & b) + union = len(a | b) + return inter / union if union else 0.0 + + +def _top_weight_keys(weights: Mapping[int, float], limit: int) -> List[int]: + if not weights: + return [] + return [ + int(k) + for k, _ in sorted(weights.items(), key=lambda x: -float(x[1]))[:limit] + if int(k) > 0 + ] + + +def _target_profile_signals(target: PlanningTargetProfile) -> Tuple[List[int], List[int], List[int]]: + skill_ids = _top_weight_keys(target.skill_weights, 8) + for sid in _top_weight_keys(target.skill_gap_weights, 6): + if sid not in skill_ids: + skill_ids.append(sid) + focus_ids = _top_weight_keys(target.focus_area_ids, 6) + style_ids = _top_weight_keys(target.style_direction_ids, 4) + return skill_ids[:12], focus_ids, style_ids + + +def fetch_retrieval_candidate_rows( + cur, + *, + vis_sql: str, + vis_params: Sequence[Any], + query: str, + exercise_kind_any: Optional[List[str]], + target: PlanningTargetProfile, + progression_successor_ids: Set[int], + anchor_skill_ids: Set[int], + raw_pool_limit: int = _RAW_POOL_LIMIT, +) -> List[Dict[str, Any]]: + """S1b-0: Profil-geführter Kandidaten-Pool.""" + where = [vis_sql, "COALESCE(e.status, '') <> %s"] + params: List[Any] = list(vis_params) + params.append("archived") + + if query: + ft_select = "ts_rank_cd(e.search_vector, plainto_tsquery('german', %s)) AS ft_rank" + params.append(query) + else: + ft_select = "0.0::float AS ft_rank" + + ek_filtered: List[str] = [] + if exercise_kind_any: + for raw in exercise_kind_any: + s = str(raw or "").strip().lower() + if s in ("simple", "combination") and s not in ek_filtered: + ek_filtered.append(s) + if ek_filtered: + ph = ",".join(["%s"] * len(ek_filtered)) + where.append(f"(LOWER(TRIM(COALESCE(e.exercise_kind::text,''))) IN ({ph}))") + params.extend(ek_filtered) + + skill_ids, focus_ids, style_ids = _target_profile_signals(target) + if not skill_ids and anchor_skill_ids: + skill_ids = sorted(anchor_skill_ids)[:10] + + profile_clauses: List[str] = [] + if skill_ids: + ph = ",".join(["%s"] * len(skill_ids)) + profile_clauses.append( + f"EXISTS (SELECT 1 FROM exercise_skills es WHERE es.exercise_id = e.id AND es.skill_id IN ({ph}))" + ) + params.extend(skill_ids) + if focus_ids: + ph = ",".join(["%s"] * len(focus_ids)) + profile_clauses.append( + f"EXISTS (SELECT 1 FROM exercise_focus_areas efa WHERE efa.exercise_id = e.id AND efa.focus_area_id IN ({ph}))" + ) + params.extend(focus_ids) + if style_ids: + ph = ",".join(["%s"] * len(style_ids)) + profile_clauses.append( + f"EXISTS (SELECT 1 FROM exercise_style_directions esd WHERE esd.exercise_id = e.id AND esd.style_direction_id IN ({ph}))" + ) + params.extend(style_ids) + if progression_successor_ids: + ph = ",".join(["%s"] * len(progression_successor_ids)) + profile_clauses.append(f"e.id IN ({ph})") + params.extend(sorted(progression_successor_ids)) + if query: + profile_clauses.append("e.search_vector @@ plainto_tsquery('german', %s)") + params.append(query) + + use_profile_pool = bool(profile_clauses) + if use_profile_pool: + where.append(f"({' OR '.join(profile_clauses)})") + + order_by = "e.updated_at DESC, e.id DESC" + if query: + order_by = "ft_rank DESC NULLS LAST, e.updated_at DESC, e.id DESC" + + sql = f""" + SELECT e.id, e.title, e.summary, + ( + SELECT fa.name FROM exercise_focus_areas efa + JOIN focus_areas fa ON fa.id = efa.focus_area_id + WHERE efa.exercise_id = e.id + ORDER BY efa.is_primary DESC NULLS LAST, fa.name ASC + LIMIT 1 + ) AS primary_focus_name, + {ft_select} + FROM exercises e + WHERE {' AND '.join(where)} + ORDER BY {order_by} + LIMIT %s + """ + params.append(int(raw_pool_limit)) + cur.execute(sql, params) + rows = [dict(r) for r in cur.fetchall()] + + if rows or not use_profile_pool: + return rows + + return _fetch_broad_fallback_pool( + cur, + vis_sql=vis_sql, + vis_params=vis_params, + query=query, + ek_filtered=ek_filtered, + raw_pool_limit=raw_pool_limit, + ) + + +def _fetch_broad_fallback_pool( + cur, + *, + vis_sql: str, + vis_params: Sequence[Any], + query: str, + ek_filtered: List[str], + raw_pool_limit: int, +) -> List[Dict[str, Any]]: + fallback_where = [vis_sql, "COALESCE(e.status, '') <> %s"] + fallback_params: List[Any] = list(vis_params) + fallback_params.append("archived") + if ek_filtered: + ph = ",".join(["%s"] * len(ek_filtered)) + fallback_where.append(f"(LOWER(TRIM(COALESCE(e.exercise_kind::text,''))) IN ({ph}))") + fallback_params.extend(ek_filtered) + if query: + ft_fb = "ts_rank_cd(e.search_vector, plainto_tsquery('german', %s)) AS ft_rank" + fb_order = "ft_rank DESC NULLS LAST, e.updated_at DESC, e.id DESC" + fallback_params.insert(0, query) + else: + ft_fb = "0.0::float AS ft_rank" + fb_order = "e.updated_at DESC, e.id DESC" + + fb_sql = f""" + SELECT e.id, e.title, e.summary, + ( + SELECT fa.name FROM exercise_focus_areas efa + JOIN focus_areas fa ON fa.id = efa.focus_area_id + WHERE efa.exercise_id = e.id + ORDER BY efa.is_primary DESC NULLS LAST, fa.name ASC + LIMIT 1 + ) AS primary_focus_name, + {ft_fb} + FROM exercises e + WHERE {' AND '.join(fallback_where)} + ORDER BY {fb_order} + LIMIT %s + """ + fallback_params.append(int(raw_pool_limit)) + cur.execute(fb_sql, fallback_params) + return [dict(r) for r in cur.fetchall()] + + +def profile_preselect_rows( + cur, + rows: Sequence[Dict[str, Any]], + *, + target: PlanningTargetProfile, + intent: str, + progression_successor_ids: Set[int], + query: str, + preselect_limit: int = _PROFILE_PRESELECT_LIMIT, +) -> Tuple[List[Dict[str, Any]], bool]: + """S1b-1: Profil-Score auf Pool, Top-K für Hybrid.""" + if len(rows) <= preselect_limit: + return list(rows), False + + cand_ids = [int(r["id"]) for r in rows] + match_profiles = load_exercise_match_profiles_bulk(cur, cand_ids) + + scored: List[Tuple[float, Dict[str, Any]]] = [] + row_by_id = {int(r["id"]): r for r in rows} + must_keep: Set[int] = set(int(x) for x in progression_successor_ids) + + if query: + max_ft = max(float(r.get("ft_rank") or 0.0) for r in rows) or 0.0 + if max_ft > 0: + for r in rows: + if float(r.get("ft_rank") or 0.0) / max_ft >= 0.5: + must_keep.add(int(r["id"])) + + for eid in cand_ids: + emp = match_profiles.get(eid) + profile_score = 0.0 + if emp: + profile_score, _ = score_exercise_against_target(emp, target, intent=intent) + scored.append((profile_score, row_by_id[eid])) + + scored.sort(key=lambda x: (-x[0], str(x[1].get("title") or ""))) + selected: List[Dict[str, Any]] = [] + seen: Set[int] = set() + for _, row in scored: + eid = int(row["id"]) + if eid in seen: + continue + seen.add(eid) + selected.append(row) + if len(selected) >= preselect_limit: + break + + for eid in must_keep: + if eid in seen: + continue + row = row_by_id.get(eid) + if row: + selected.append(row) + seen.add(eid) + + return selected, True + + +def hybrid_score_planning_hits( + cur, + rows: Sequence[Dict[str, Any]], + *, + query: str, + intent: str, + intent_weights: Mapping[str, float], + target: PlanningTargetProfile, + pack: Mapping[str, Any], +) -> Tuple[List[Dict[str, Any]], Dict[int, Set[int]]]: + """S1b-2: Hybrid-Score auf vorselektiertem Pool.""" + planned_set = set(pack.get("planned_exercise_ids") or []) + group_recent_set = set(pack.get("group_recent_exercise_ids") or []) + progression_set = set(pack.get("progression_successor_ids") or []) + anchor_skills = set(pack.get("anchor_skill_ids") or []) + anchor_id = pack.get("anchor_exercise_id") + progression_notes = pack.get("progression_edge_notes") or {} + + last_planned_skills: Set[int] = set() + planned_ids = pack.get("planned_exercise_ids") or [] + if planned_ids: + cur.execute( + "SELECT skill_id FROM exercise_skills WHERE exercise_id = %s", + (int(planned_ids[-1]),), + ) + last_planned_skills = {int(r["skill_id"]) for r in cur.fetchall() if r.get("skill_id")} + + cand_ids = [int(r["id"]) for r in rows] + skills_by_ex: Dict[int, Set[int]] = {cid: set() for cid in cand_ids} + match_profiles = load_exercise_match_profiles_bulk(cur, cand_ids) + if cand_ids: + ph = ",".join(["%s"] * len(cand_ids)) + cur.execute( + f"SELECT exercise_id, skill_id FROM exercise_skills WHERE exercise_id IN ({ph})", + cand_ids, + ) + for r in cur.fetchall(): + skills_by_ex.setdefault(int(r["exercise_id"]), set()).add(int(r["skill_id"])) + + max_ft = 0.0 + scored_items: List[Dict[str, Any]] = [] + for row in rows: + eid = int(row["id"]) + if anchor_id and eid == int(anchor_id): + continue + ft = float(row.get("ft_rank") or 0.0) + if ft > max_ft: + max_ft = ft + scored_items.append( + { + "row": row, + "eid": eid, + "ft": ft, + "skills": skills_by_ex.get(eid, set()), + } + ) + + weights = dict(intent_weights) + hits: List[Dict[str, Any]] = [] + for item in scored_items: + eid = item["eid"] + row = item["row"] + ft_norm = (item["ft"] / max_ft) if max_ft > 0 else 0.0 + prog_hit = 1.0 if eid in progression_set else 0.0 + skill_sim = _skill_jaccard(anchor_skills, item["skills"]) if anchor_skills else 0.0 + plan_aff = 0.0 + if last_planned_skills and item["skills"]: + plan_aff = _skill_jaccard(last_planned_skills, item["skills"]) + repeat_unit = 1.0 if eid in planned_set else 0.0 + repeat_group = 1.0 if eid in group_recent_set else 0.0 + profile_score = 0.0 + profile_reasons: List[str] = [] + emp = match_profiles.get(eid) + if emp: + profile_score, profile_reasons = score_exercise_against_target( + emp, target, intent=intent + ) + + score = ( + weights["fulltext"] * ft_norm + + weights["progression"] * prog_hit + + weights["skill"] * skill_sim + + weights["plan"] * plan_aff + + weights["profile"] * profile_score + + weights["repeat_unit"] * repeat_unit + + weights["repeat_group"] * repeat_group + ) + + reasons: List[str] = [] + if query and ft_norm >= 0.35: + reasons.append("Volltext-Treffer") + if prog_hit > 0: + note = progression_notes.get(eid) + reasons.append( + f"Nachfolger im Progressionsgraph{f': {note}' if note else ''}" + ) + if skill_sim >= 0.2 and anchor_id: + reasons.append("Fähigkeiten passen zur Anker-Übung") + if plan_aff >= 0.25: + reasons.append("Schließt an Skills der letzten geplanten Übung an") + if repeat_unit > 0: + reasons.append("Bereits in dieser Einheit eingeplant") + if repeat_group > 0 and repeat_unit <= 0: + reasons.append("Kürzlich in der Gruppe verwendet") + for pr in profile_reasons: + if pr not in reasons: + reasons.append(pr) + + if score <= 0 and not reasons and not query: + if prog_hit or skill_sim or plan_aff or profile_score: + score = 0.05 + prog_hit * 0.3 + skill_sim * 0.2 + profile_score * 0.25 + + hits.append( + { + "id": eid, + "title": row.get("title"), + "summary": row.get("summary"), + "focus_area": row.get("primary_focus_name"), + "score": round(max(0.0, min(1.0, score)), 4), + "reasons": reasons, + } + ) + + hits.sort(key=lambda h: (-h["score"], h.get("title") or "")) + return hits, skills_by_ex + + +def run_multistage_planning_retrieval( + cur, + *, + vis_sql: str, + vis_params: Sequence[Any], + query: str, + exercise_kind_any: Optional[List[str]], + target: PlanningTargetProfile, + intent: str, + intent_weights: Mapping[str, float], + pack: Mapping[str, Any], +) -> Tuple[List[Dict[str, Any]], Dict[int, Set[int]], bool]: + """Orchestriert S1b-0 → S1b-1 → S1b-2.""" + progression_set = set(pack.get("progression_successor_ids") or []) + anchor_skills = set(pack.get("anchor_skill_ids") or []) + + rows = fetch_retrieval_candidate_rows( + cur, + vis_sql=vis_sql, + vis_params=vis_params, + query=query, + exercise_kind_any=exercise_kind_any, + target=target, + progression_successor_ids=progression_set, + anchor_skill_ids=anchor_skills, + ) + rows, preselect_applied = profile_preselect_rows( + cur, + rows, + target=target, + intent=intent, + progression_successor_ids=progression_set, + query=query, + ) + hits, skills_by_ex = hybrid_score_planning_hits( + cur, + rows, + query=query, + intent=intent, + intent_weights=intent_weights, + target=target, + pack=pack, + ) + return hits, skills_by_ex, preselect_applied + + +__all__ = [ + "fetch_retrieval_candidate_rows", + "hybrid_score_planning_hits", + "profile_preselect_rows", + "run_multistage_planning_retrieval", +] diff --git a/backend/planning_exercise_suggest.py b/backend/planning_exercise_suggest.py index 8f5617b..c9f0277 100644 --- a/backend/planning_exercise_suggest.py +++ b/backend/planning_exercise_suggest.py @@ -12,14 +12,12 @@ from fastapi import HTTPException from pydantic import BaseModel, Field from tenant_context import TenantContext, library_content_visibility_sql -from planning_exercise_profiles import ( - load_exercise_match_profiles_bulk, - score_exercise_against_target, -) +from planning_exercise_retrieval import run_multistage_planning_retrieval from planning_exercise_llm_rank import try_llm_rerank_planning_hits from planning_exercise_target_pipeline import ( build_planning_target_with_query_pipeline, compose_retrieval_phase, + should_run_llm_rank_pipeline, ) # Planungs-Berechtigung + Sektionen (bestehende Implementierung) @@ -43,7 +41,7 @@ VALID_INTENTS = { INTENT_FREE_SEARCH, } -_CANDIDATE_POOL_LIMIT = 400 + _LLM_RERANK_PRE_LIMIT = 32 @@ -257,14 +255,6 @@ def _normalize_query(query: Optional[str]) -> str: return re.sub(r"\s+", " ", (query or "").strip()) -def _skill_jaccard(a: Set[int], b: Set[int]) -> float: - if not a or not b: - return 0.0 - inter = len(a & b) - union = len(a | b) - return inter / union if union else 0.0 - - def _apply_client_planned_override( cur, pack: Dict[str, Any], @@ -493,162 +483,37 @@ def suggest_planning_exercises( effective_club_id=tenant.effective_club_id, ) - where = [vis_sql, "COALESCE(e.status, '') <> %s"] - params: List[Any] = [] - if query: - ft_select = "ts_rank_cd(e.search_vector, plainto_tsquery('german', %s)) AS ft_rank" - params.append(query) - else: - ft_select = "0.0::float AS ft_rank" - - params.extend(list(vis_params)) - params.append("archived") - - ek_filtered: List[str] = [] - if body.exercise_kind_any: - for raw in body.exercise_kind_any: - s = str(raw or "").strip().lower() - if s in ("simple", "combination") and s not in ek_filtered: - ek_filtered.append(s) - if ek_filtered: - ph = ",".join(["%s"] * len(ek_filtered)) - where.append(f"(LOWER(TRIM(COALESCE(e.exercise_kind::text,''))) IN ({ph}))") - params.extend(ek_filtered) - - sql = f""" - SELECT e.id, e.title, e.summary, - ( - SELECT fa.name FROM exercise_focus_areas efa - JOIN focus_areas fa ON fa.id = efa.focus_area_id - WHERE efa.exercise_id = e.id - ORDER BY efa.is_primary DESC NULLS LAST, fa.name ASC - LIMIT 1 - ) AS primary_focus_name, - {ft_select} - FROM exercises e - WHERE {' AND '.join(where)} - ORDER BY e.updated_at DESC, e.id DESC - LIMIT %s - """ - params.append(_CANDIDATE_POOL_LIMIT) - cur.execute(sql, params) - rows = cur.fetchall() + hits, skills_by_ex, profile_preselect_applied = run_multistage_planning_retrieval( + cur, + vis_sql=vis_sql, + vis_params=vis_params, + query=query, + exercise_kind_any=body.exercise_kind_any, + target=target_profile, + intent=intent, + intent_weights=weights, + pack=pack, + ) planned_set = set(pack["planned_exercise_ids"]) - group_recent_set = set(pack["group_recent_exercise_ids"]) - progression_set = set(pack["progression_successor_ids"]) - anchor_skills = set(pack["anchor_skill_ids"]) - anchor_id = pack.get("anchor_exercise_id") - progression_notes = pack.get("progression_edge_notes") or {} - last_planned_skills: Set[int] = set() - if pack["planned_exercise_ids"]: - last_planned_skills = _load_skill_ids_for_exercise(cur, pack["planned_exercise_ids"][-1]) - # Skill-IDs + ExerciseMatchProfile pro Kandidat (Batch) - cand_ids = [int(r["id"]) for r in rows] - skills_by_ex: Dict[int, Set[int]] = {cid: set() for cid in cand_ids} - match_profiles = load_exercise_match_profiles_bulk(cur, cand_ids) - if cand_ids: - ph = ",".join(["%s"] * len(cand_ids)) - cur.execute( - f"SELECT exercise_id, skill_id FROM exercise_skills WHERE exercise_id IN ({ph})", - cand_ids, - ) - for r in cur.fetchall(): - skills_by_ex.setdefault(int(r["exercise_id"]), set()).add(int(r["skill_id"])) - - max_ft = 0.0 - scored: List[Dict[str, Any]] = [] - for row in rows: - eid = int(row["id"]) - if anchor_id and eid == int(anchor_id): - continue - ft = float(row.get("ft_rank") or 0.0) - if ft > max_ft: - max_ft = ft - scored.append( - { - "row": row, - "eid": eid, - "ft": ft, - "skills": skills_by_ex.get(eid, set()), - } - ) - - hits: List[Dict[str, Any]] = [] - for item in scored: - eid = item["eid"] - row = item["row"] - ft_norm = (item["ft"] / max_ft) if max_ft > 0 else 0.0 - prog_hit = 1.0 if eid in progression_set else 0.0 - skill_sim = _skill_jaccard(anchor_skills, item["skills"]) if anchor_skills else 0.0 - plan_aff = 0.0 - if last_planned_skills and item["skills"]: - plan_aff = _skill_jaccard(last_planned_skills, item["skills"]) - repeat_unit = 1.0 if eid in planned_set else 0.0 - repeat_group = 1.0 if eid in group_recent_set else 0.0 - profile_score = 0.0 - profile_reasons: List[str] = [] - emp = match_profiles.get(eid) - if emp: - profile_score, profile_reasons = score_exercise_against_target( - emp, target_profile, intent=intent - ) - - score = ( - weights["fulltext"] * ft_norm - + weights["progression"] * prog_hit - + weights["skill"] * skill_sim - + weights["plan"] * plan_aff - + weights["profile"] * profile_score - + weights["repeat_unit"] * repeat_unit - + weights["repeat_group"] * repeat_group - ) - - reasons: List[str] = [] - if query and ft_norm >= 0.35: - reasons.append("Volltext-Treffer") - if prog_hit > 0: - note = progression_notes.get(eid) - reasons.append( - f"Nachfolger im Progressionsgraph{f': {note}' if note else ''}" - ) - if skill_sim >= 0.2 and anchor_id: - reasons.append("Fähigkeiten passen zur Anker-Übung") - if plan_aff >= 0.25: - reasons.append("Schließt an Skills der letzten geplanten Übung an") - if repeat_unit > 0: - reasons.append("Bereits in dieser Einheit eingeplant") - if repeat_group > 0 and repeat_unit <= 0: - reasons.append("Kürzlich in der Gruppe verwendet") - for pr in profile_reasons: - if pr not in reasons: - reasons.append(pr) - - if score <= 0 and not reasons and not query: - # Leere Query: trotzdem schwache Kandidaten mit Skill/Progression - if prog_hit or skill_sim or plan_aff or profile_score: - score = 0.05 + prog_hit * 0.3 + skill_sim * 0.2 + profile_score * 0.25 - - hits.append( - { - "id": eid, - "title": row.get("title"), - "summary": row.get("summary"), - "focus_area": row.get("primary_focus_name"), - "score": round(max(0.0, min(1.0, score)), 4), - "reasons": reasons, - } - ) - - hits.sort(key=lambda h: (-h["score"], h.get("title") or "")) - - llm_applied = False - retrieval_phase = compose_retrieval_phase(query_intent=query_intent_applied, llm_rank=False) - if body.include_llm_rank: + llm_rank_applied = False + retrieval_phase = compose_retrieval_phase( + profile_preselect=profile_preselect_applied, + query_intent=query_intent_applied, + llm_rank=False, + ) + run_llm_rank = should_run_llm_rank_pipeline( + query, + scenario_kind, + include_llm_rank=body.include_llm_rank, + query_intent_applied=query_intent_applied, + hits=hits, + ) + if run_llm_rank: pre_limit = max(int(body.limit), _LLM_RERANK_PRE_LIMIT) pool_hits = hits[:pre_limit] - pool_hits, llm_applied = try_llm_rerank_planning_hits( + pool_hits, llm_rank_applied = try_llm_rerank_planning_hits( cur, hits=pool_hits, skills_by_ex=skills_by_ex, @@ -665,8 +530,9 @@ def suggest_planning_exercises( target_profile_summary=target_profile_summary, limit=int(body.limit), ) - if llm_applied: + if llm_rank_applied: retrieval_phase = compose_retrieval_phase( + profile_preselect=profile_preselect_applied, query_intent=query_intent_applied, llm_rank=True, ) @@ -696,7 +562,9 @@ def suggest_planning_exercises( "scenario_kind": scenario_kind, "query_intent_summary": query_intent_summary, "retrieval_phase": retrieval_phase, - "llm_rank_applied": llm_applied, + "profile_preselect_applied": profile_preselect_applied, + "llm_rank_applied": llm_rank_applied, + "llm_intent_applied": query_intent_applied, "intent_resolved": intent, "intent_heuristic": heuristic_intent, "query_normalized": query or None, diff --git a/backend/planning_exercise_target_pipeline.py b/backend/planning_exercise_target_pipeline.py index f76763b..6abce68 100644 --- a/backend/planning_exercise_target_pipeline.py +++ b/backend/planning_exercise_target_pipeline.py @@ -99,7 +99,52 @@ def should_run_llm_intent_pipeline( return False if scenario == SCENARIO_PRESET_NEXT: return False - return bool(_normalize_query(query)) + q = _normalize_query(query) + if not q: + return False + # Kurze Stichwortsuche: Volltext + Profil reichen — kein Intent-LLM + if scenario == SCENARIO_FREE_SEARCH and len(q) < 14: + return False + if scenario in (SCENARIO_CONTINUE_PLAN, SCENARIO_PROGRESSION) and len(q) < 18: + return False + return True + + +def deterministic_rank_confident(hits: Sequence[Mapping[str, Any]], *, gap_threshold: float = 0.12) -> bool: + """True wenn Hybrid-Ranking schon klar genug ist — LLM-Rerank sparen.""" + if len(hits) < 4: + return True + top = float(hits[0].get("score") or 0.0) + fourth = float(hits[3].get("score") or 0.0) + return (top - fourth) >= gap_threshold + + +def should_run_llm_rank_pipeline( + query: Optional[str], + scenario: str, + *, + include_llm_rank: bool, + query_intent_applied: bool, + hits: Sequence[Mapping[str, Any]], +) -> bool: + """ + Maximal ein LLM-Call pro Request: wenn Intent-LLM lief, kein Rerank. + Rerank nur bei längerer, komplexer Anfrage und unklarem Hybrid-Ranking. + """ + if not include_llm_rank: + return False + if query_intent_applied: + return False + if scenario == SCENARIO_PRESET_NEXT: + return False + q = _normalize_query(query) + if not q: + return False + if scenario == SCENARIO_ADDITIVE: + return len(q) >= 12 and not deterministic_rank_confident(hits) + if len(q) < 22: + return False + return not deterministic_rank_confident(hits) def _recalculate_skill_gap(target: PlanningTargetProfile) -> PlanningTargetProfile: @@ -263,8 +308,15 @@ VALID_SCENARIOS_SET = { } -def compose_retrieval_phase(*, query_intent: bool, llm_rank: bool) -> str: +def compose_retrieval_phase( + *, + profile_preselect: bool = False, + query_intent: bool = False, + llm_rank: bool = False, +) -> str: parts = ["profile_v1"] + if profile_preselect: + parts.append("profile_preselect") if query_intent: parts.append("query_intent") if llm_rank: @@ -281,4 +333,6 @@ __all__ = [ "is_simple_preset_query", "merge_query_overlay_into_target", "should_run_llm_intent_pipeline", + "should_run_llm_rank_pipeline", + "deterministic_rank_confident", ] diff --git a/backend/tests/test_planning_exercise_suggest.py b/backend/tests/test_planning_exercise_suggest.py index bd0ee25..6daf854 100644 --- a/backend/tests/test_planning_exercise_suggest.py +++ b/backend/tests/test_planning_exercise_suggest.py @@ -44,11 +44,40 @@ def test_should_skip_llm_for_preset(): ) +def test_should_skip_llm_intent_short_free_search(): + from planning_exercise_target_pipeline import SCENARIO_FREE_SEARCH, should_run_llm_intent_pipeline + + assert not should_run_llm_intent_pipeline( + "Partnerübung", + SCENARIO_FREE_SEARCH, + include_llm_intent=True, + ) + + +def test_should_skip_llm_rank_when_intent_already_applied(): + from planning_exercise_target_pipeline import SCENARIO_ADDITIVE, should_run_llm_rank_pipeline + + hits = [{"score": 0.5}, {"score": 0.48}, {"score": 0.47}, {"score": 0.46}] + assert not should_run_llm_rank_pipeline( + "Baut auf dem Plan auf und trainiert zusätzlich Schnellkraft mit Partner", + SCENARIO_ADDITIVE, + include_llm_rank=True, + query_intent_applied=True, + hits=hits, + ) + + def test_compose_retrieval_phase(): assert compose_retrieval_phase(query_intent=False, llm_rank=False) == "profile_v1" assert compose_retrieval_phase(query_intent=True, llm_rank=True) == "profile_v1+query_intent+llm_rank" + assert ( + compose_retrieval_phase(profile_preselect=True, query_intent=True, llm_rank=False) + == "profile_v1+profile_preselect+query_intent" + ) + + def test_parse_planning_query_intent_response(): parsed = parse_planning_query_intent_response( '{"intent":"continue_plan_goal","scenario":"additive_constraint",' diff --git a/backend/version.py b/backend/version.py index 4808aa8..410f26a 100644 --- a/backend/version.py +++ b/backend/version.py @@ -1,6 +1,6 @@ # Shinkan Jinkendo Version Information -APP_VERSION = "0.8.172" +APP_VERSION = "0.8.173" BUILD_DATE = "2026-05-22" DB_SCHEMA_VERSION = "20260531073" @@ -28,7 +28,7 @@ MODULE_VERSIONS = { "skill_profiles": "1.0.0", # Phase 3: gewichtetes Fähigkeiten-Profil + skill-discovery/suggestions "methods": "0.1.0", "exercises": "2.37.0", # Planungs-KI P1: Szenario-Pipeline + Query-Intent-Overlay - "planning_exercise_suggest": "0.4.1", # unit_id optional; client_free Kontext; group_id + "planning_exercise_suggest": "0.5.0", # Mehrstufiges Profil-Retrieval; LLM-Gates (max 1 Call) "training_units": "0.4.0", # POST .../publish-to-framework: Ablauf aus geplanter Einheit → Rahmen-Slot-Blueprint "training_programs": "0.1.0", "planning": "0.15.0", # Vorlagen: Strukturvorschau, Bearbeiten inkl. Split-Sessions + Beschreibung @@ -43,6 +43,14 @@ MODULE_VERSIONS = { } CHANGELOG = [ + { + "version": "0.8.173", + "date": "2026-05-22", + "changes": [ + "Planungs-KI: mehrstufiges Profil-Retrieval (Pool → Profil-Vorselektion → Hybrid); LLM max. 1 Call pro Suche.", + "LLM-Gates: kein Intent bei Kurz-Stichwort/preset; kein Rerank wenn Intent-LLM lief oder Ranking klar.", + ], + }, { "version": "0.8.172", "date": "2026-05-22", diff --git a/frontend/src/components/ExercisePickerModal.jsx b/frontend/src/components/ExercisePickerModal.jsx index 65f849e..333737c 100644 --- a/frontend/src/components/ExercisePickerModal.jsx +++ b/frontend/src/components/ExercisePickerModal.jsx @@ -29,6 +29,9 @@ import { const PAGE_SIZE = 100 /** Backend POST /api/planning/exercise-suggest erlaubt max. 50 */ const PLANNING_SUGGEST_LIMIT = 50 +/** Client-Hinweis — Backend entscheidet final über LLM-Gates (max. 1 Call). */ +const PLANNING_LLM_INTENT_MIN_CHARS = 10 +const PLANNING_LLM_RANK_MIN_CHARS = 24 const LEVEL_FILTER_OPTS = SKILL_LEVEL_OPTIONS.filter((o) => o.level != null) const INITIAL_FILTERS = { ...INITIAL_EXERCISE_LIST_FILTERS } @@ -79,8 +82,13 @@ export default function ExercisePickerModal({ const [planningContextSummary, setPlanningContextSummary] = useState(null) const [planningTargetProfileSummary, setPlanningTargetProfileSummary] = useState(null) const [planningLlmRankApplied, setPlanningLlmRankApplied] = useState(false) + const [planningLlmIntentApplied, setPlanningLlmIntentApplied] = useState(false) + const [planningRetrievalPhase, setPlanningRetrievalPhase] = useState('') const [planningQueryIntentSummary, setPlanningQueryIntentSummary] = useState(null) const [planningIntentResolved, setPlanningIntentResolved] = useState(null) + const [planningHasSearched, setPlanningHasSearched] = useState(false) + const [planningSubmittedQuery, setPlanningSubmittedQuery] = useState('') + const [planningSearchTick, setPlanningSearchTick] = useState(0) const pickerScrollRef = useRef(null) const resolvedPlanningUnitId = useMemo(() => { @@ -124,13 +132,24 @@ export default function ExercisePickerModal({ !enableFreePlanningSearch ) - /** Gemeinsamer Suchtext — in Planung nur ein Feld; in Bibliothek beide Felder kombiniert. */ + /** Gemeinsamer Suchtext — Planung: nur nach Button; Bibliothek: debounced live. */ const effectivePickerQuery = useMemo(() => { if (usePlanningSearch) { - return (debouncedSearch || debouncedAi).trim() + return planningSubmittedQuery } return [debouncedSearch, debouncedAi].filter(Boolean).join(' ').trim() - }, [usePlanningSearch, debouncedSearch, debouncedAi]) + }, [usePlanningSearch, planningSubmittedQuery, debouncedSearch, debouncedAi]) + + const submitPlanningSearch = useCallback((queryOverride) => { + const q = + queryOverride !== undefined && queryOverride !== null + ? String(queryOverride).trim() + : (searchInput || aiSearchInput).trim() + setPlanningSubmittedQuery(q) + setPlanningHasSearched(true) + setList([]) + setPlanningSearchTick((t) => t + 1) + }, [searchInput, aiSearchInput]) const { title: quickTitle, @@ -163,7 +182,8 @@ export default function ExercisePickerModal({ catalogsReady && !loading && list.length === 0 && - (usePlanningSearch || effectivePickerQuery.length >= 3) + planningHasSearched && + (usePlanningSearch ? true : effectivePickerQuery.length >= 3) useEffect(() => { if (!open) return @@ -215,8 +235,13 @@ export default function ExercisePickerModal({ setPlanningContextSummary(null) setPlanningTargetProfileSummary(null) setPlanningLlmRankApplied(false) + setPlanningLlmIntentApplied(false) + setPlanningRetrievalPhase('') setPlanningQueryIntentSummary(null) setPlanningIntentResolved(null) + setPlanningHasSearched(false) + setPlanningSubmittedQuery('') + setPlanningSearchTick(0) return } setFilters(mergeExerciseListPrefsFromApi(user?.exercise_list_prefs)) @@ -311,14 +336,46 @@ export default function ExercisePickerModal({ return q }, [filters, effectivePickerQuery, exerciseKindAny]) - const reload = useCallback(async () => { - if (!open || !catalogsReady) return - if (planningSearchBlocked) { + const reloadLibrary = useCallback(async () => { + if (!open || !catalogsReady || usePlanningSearch) return + setLoading(true) + try { + setPlanningContextSummary(null) + setPlanningTargetProfileSummary(null) + setPlanningLlmRankApplied(false) + setPlanningLlmIntentApplied(false) + setPlanningRetrievalPhase('') + setPlanningQueryIntentSummary(null) + setPlanningIntentResolved(null) + const batch = await api.listExercises({ + ...queryBase, + include_archived: true, + include_variants: true, + limit: PAGE_SIZE, + offset: 0, + }) + setList(Array.isArray(batch) ? batch : []) + setHasMore(batch?.length === PAGE_SIZE) + } catch (e) { + console.error(e) + alert(e.message || 'Laden fehlgeschlagen') + setList([]) + setHasMore(false) + } finally { + setLoading(false) + } + }, [open, catalogsReady, usePlanningSearch, queryBase]) + + const reloadPlanning = useCallback(async () => { + if (!open || !catalogsReady || !usePlanningSearch || planningSearchTick === 0) return + if (planningSearchBlocked || !activePlanningContext) { setList([]) setHasMore(false) setPlanningContextSummary(null) setPlanningTargetProfileSummary(null) setPlanningLlmRankApplied(false) + setPlanningLlmIntentApplied(false) + setPlanningRetrievalPhase('') setPlanningQueryIntentSummary(null) setPlanningIntentResolved(null) setLoading(false) @@ -326,83 +383,69 @@ export default function ExercisePickerModal({ } setLoading(true) try { - if (usePlanningSearch && activePlanningContext) { - const query = effectivePickerQuery - const requestBody = { - section_order_index: - activePlanningContext.sectionOrderIndex != null - ? Number(activePlanningContext.sectionOrderIndex) - : null, - phase_order_index: - activePlanningContext.phaseOrderIndex != null - ? Number(activePlanningContext.phaseOrderIndex) - : null, - parallel_stream_order_index: - activePlanningContext.parallelStreamOrderIndex != null - ? Number(activePlanningContext.parallelStreamOrderIndex) - : null, - anchor_exercise_id: - activePlanningContext.anchorExerciseId != null - ? Number(activePlanningContext.anchorExerciseId) - : null, - progression_graph_id: - activePlanningContext.progressionGraphId != null - ? Number(activePlanningContext.progressionGraphId) - : null, - planned_exercise_ids: - Array.isArray(activePlanningContext.plannedExerciseIds) && - activePlanningContext.plannedExerciseIds.length > 0 - ? activePlanningContext.plannedExerciseIds - .map((x) => Number(x)) - .filter((x) => Number.isFinite(x) && x > 0) - : undefined, - include_llm_intent: Boolean(query), - include_llm_rank: Boolean(query), - query, - intent_hint: activePlanningContext.intentHint || (useFreePlanningSearch && query ? 'free_search' : null), - limit: PLANNING_SUGGEST_LIMIT, - exercise_kind_any: - Array.isArray(exerciseKindAny) && exerciseKindAny.length > 0 ? exerciseKindAny : undefined, - } - if (resolvedPlanningUnitId) { - requestBody.unit_id = Number(resolvedPlanningUnitId) - } - if (activePlanningContext.groupId) { - requestBody.group_id = Number(activePlanningContext.groupId) - } - const res = await api.suggestPlanningExercises(requestBody) - setPlanningContextSummary(res?.context_summary || null) - setPlanningTargetProfileSummary(res?.target_profile_summary || null) - setPlanningLlmRankApplied(Boolean(res?.llm_rank_applied)) - setPlanningQueryIntentSummary(res?.query_intent_summary || null) - setPlanningIntentResolved(res?.intent_resolved || null) - const hits = (Array.isArray(res?.hits) ? res.hits : []).map((h) => ({ - id: h.id, - title: h.title, - summary: h.summary, - focus_area: h.focus_area, - _planningScore: h.score, - _planningReasons: Array.isArray(h.reasons) ? h.reasons : [], - updated_at: new Date().toISOString(), - })) - setList(hits) - setHasMore(false) - } else { - setPlanningContextSummary(null) - setPlanningTargetProfileSummary(null) - setPlanningLlmRankApplied(false) - setPlanningQueryIntentSummary(null) - setPlanningIntentResolved(null) - const batch = await api.listExercises({ - ...queryBase, - include_archived: true, - include_variants: true, - limit: PAGE_SIZE, - offset: 0, - }) - setList(Array.isArray(batch) ? batch : []) - setHasMore(batch?.length === PAGE_SIZE) + const query = planningSubmittedQuery + const requestBody = { + section_order_index: + activePlanningContext.sectionOrderIndex != null + ? Number(activePlanningContext.sectionOrderIndex) + : null, + phase_order_index: + activePlanningContext.phaseOrderIndex != null + ? Number(activePlanningContext.phaseOrderIndex) + : null, + parallel_stream_order_index: + activePlanningContext.parallelStreamOrderIndex != null + ? Number(activePlanningContext.parallelStreamOrderIndex) + : null, + anchor_exercise_id: + activePlanningContext.anchorExerciseId != null + ? Number(activePlanningContext.anchorExerciseId) + : null, + progression_graph_id: + activePlanningContext.progressionGraphId != null + ? Number(activePlanningContext.progressionGraphId) + : null, + planned_exercise_ids: + Array.isArray(activePlanningContext.plannedExerciseIds) && + activePlanningContext.plannedExerciseIds.length > 0 + ? activePlanningContext.plannedExerciseIds + .map((x) => Number(x)) + .filter((x) => Number.isFinite(x) && x > 0) + : undefined, + include_llm_intent: query.length >= PLANNING_LLM_INTENT_MIN_CHARS, + include_llm_rank: query.length >= PLANNING_LLM_RANK_MIN_CHARS, + query, + intent_hint: + activePlanningContext.intentHint || (useFreePlanningSearch && query ? 'free_search' : null), + limit: PLANNING_SUGGEST_LIMIT, + exercise_kind_any: + Array.isArray(exerciseKindAny) && exerciseKindAny.length > 0 ? exerciseKindAny : undefined, } + if (resolvedPlanningUnitId) { + requestBody.unit_id = Number(resolvedPlanningUnitId) + } + if (activePlanningContext.groupId) { + requestBody.group_id = Number(activePlanningContext.groupId) + } + const res = await api.suggestPlanningExercises(requestBody) + setPlanningContextSummary(res?.context_summary || null) + setPlanningTargetProfileSummary(res?.target_profile_summary || null) + setPlanningLlmRankApplied(Boolean(res?.llm_rank_applied)) + setPlanningLlmIntentApplied(Boolean(res?.llm_intent_applied)) + setPlanningRetrievalPhase(res?.retrieval_phase || '') + setPlanningQueryIntentSummary(res?.query_intent_summary || null) + setPlanningIntentResolved(res?.intent_resolved || null) + const hits = (Array.isArray(res?.hits) ? res.hits : []).map((h) => ({ + id: h.id, + title: h.title, + summary: h.summary, + focus_area: h.focus_area, + _planningScore: h.score, + _planningReasons: Array.isArray(h.reasons) ? h.reasons : [], + updated_at: new Date().toISOString(), + })) + setList(hits) + setHasMore(false) } catch (e) { console.error(e) alert(e.message || 'Laden fehlgeschlagen') @@ -411,6 +454,8 @@ export default function ExercisePickerModal({ setPlanningContextSummary(null) setPlanningTargetProfileSummary(null) setPlanningLlmRankApplied(false) + setPlanningLlmIntentApplied(false) + setPlanningRetrievalPhase('') setPlanningQueryIntentSummary(null) setPlanningIntentResolved(null) } finally { @@ -419,19 +464,23 @@ export default function ExercisePickerModal({ }, [ open, catalogsReady, - queryBase, usePlanningSearch, + planningSearchTick, planningSearchBlocked, activePlanningContext, - effectivePickerQuery, + planningSubmittedQuery, exerciseKindAny, resolvedPlanningUnitId, useFreePlanningSearch, ]) useEffect(() => { - reload() - }, [reload]) + reloadLibrary() + }, [reloadLibrary]) + + useEffect(() => { + reloadPlanning() + }, [reloadPlanning]) const loadMore = async () => { if (!hasMore || loadingMore || loading) return @@ -655,7 +704,11 @@ export default function ExercisePickerModal({ ? ` · ${String(planningQueryIntentSummary.scenario).replace(/_/g, ' ')}` : null} {planningLlmRankApplied ? ' · KI-Ranking aktiv' : null} - {planningQueryIntentSummary?.llm_applied ? ' · KI-Intent aktiv' : null} + {planningLlmIntentApplied ? ' · KI-Intent aktiv' : null} + {!planningLlmRankApplied && !planningLlmIntentApplied && usePlanningSearch + ? ' · ohne LLM (Profil/Hybrid)' + : null} + {planningRetrievalPhase ? ` · ${planningRetrievalPhase}` : null}
) : null} @@ -729,20 +782,51 @@ export default function ExercisePickerModal({ {usePlanningSearch ? (- Leer lassen = nächste Übung aus Planungskontext. Mit Text = KI-Intent + Profil + Ranking. + Suche startet erst per Button (oder Enter) — nicht beim Tippen. LLM nur bei längeren Anfragen, + maximal ein KI-Call pro Suche.
+
{usePlanningSearch - ? effectivePickerQuery - ? 'Keine KI-Vorschläge für diese Anfrage.' - : 'Keine Vorschläge — Einheit speichern und Planungskontext prüfen, oder Anfrage eingeben.' + ? !planningHasSearched + ? 'Anfrage formulieren und „Vorschläge laden“ klicken — oder „Nächste aus Kontext“ ohne Freitext.' + : effectivePickerQuery + ? 'Keine KI-Vorschläge für diese Anfrage.' + : 'Keine Vorschläge aus dem Planungskontext — Anker, Plan oder Profil prüfen.' : effectivePickerQuery.length >= 3 ? 'Keine Treffer.' : 'Suchbegriff eingeben (mind. 3 Zeichen) …'}