Implement Learning Goal Candidate Retrieval and Roadmap Fallback Logic

- Added `_safe_tsquery_fragment` to sanitize learning goal input for SQL queries, improving query safety. - Introduced `_fetch_learning_goal_library_candidate_ids` to retrieve exercise IDs matching learning goals, enhancing exercise relevance in roadmap suggestions. - Enhanced `_match_roadmap_slot` to utilize learning goal candidates, improving the accuracy of supplemental exercise selection. - Implemented `_pick_roadmap_rank_fallback` to provide a fallback mechanism for selecting the best exercise when strict matching fails, ensuring better exercise retrieval. - Updated tests to validate the new learning goal retrieval and fallback logic, ensuring robustness in exercise selection processes.
2026-06-11 12:54:07 +02:00 · 2026-06-11 12:54:07 +02:00 · 6d130a7e09
commit 6d130a7e09
parent b2fbf6b4af
5 changed files with 227 additions and 5 deletions
--- a/backend/planning_exercise_path_builder.py
+++ b/backend/planning_exercise_path_builder.py
@ -347,6 +347,87 @@ def _graph_visibility_context(
    )
 def _safe_tsquery_fragment(text: str) -> str:
    import re
    cleaned = re.sub(r"[^\w\säöüßÄÖÜ]", " ", text or "", flags=re.UNICODE)
    words = [w for w in cleaned.split() if len(w) >= 2][:10]
    return " ".join(words) if words else (text or "")[:60].strip()
 def _fetch_learning_goal_library_candidate_ids(
    cur,
    *,
    tenant: TenantContext,
    progression_graph_id: Optional[int],
    learning_goal: str,
    limit: int = 24,
 ) -> List[int]:
    """Sichtbare Übungen, deren Titel/Volltext zum Stufen-Lernziel passt."""
    lg = (learning_goal or "").strip()
    if len(lg) < 3:
        return []
    vis_sql, vis_params = _planning_visibility_sql(cur, tenant, progression_graph_id)
    tsq = _safe_tsquery_fragment(lg)
    like_pat = f"%{lg[:100].lower()}%"
    try:
        cur.execute(
            f"""
            SELECT e.id
            FROM exercises e
            WHERE ({vis_sql})
              AND COALESCE(e.status, '') <> %s
              AND (
                lower(trim(e.title)) = lower(trim(%s))
                OR lower(e.title) LIKE %s
                OR (%s <> '' AND e.search_vector @@ plainto_tsquery('german', %s))
              )
            ORDER BY
              CASE WHEN lower(trim(e.title)) = lower(trim(%s)) THEN 0 ELSE 1 END,
              CASE WHEN %s <> '' THEN ts_rank_cd(e.search_vector, plainto_tsquery('german', %s)) ELSE 0 END DESC,
              e.id ASC
            LIMIT %s
            """,
            [
                *vis_params,
                "archived",
                lg,
                like_pat,
                tsq,
                tsq,
                lg,
                tsq,
                tsq,
                int(limit),
            ],
        )
    except Exception:
        cur.execute(
            f"""
            SELECT e.id
            FROM exercises e
            WHERE ({vis_sql})
              AND COALESCE(e.status, '') <> %s
              AND (
                lower(trim(e.title)) = lower(trim(%s))
                OR lower(e.title) LIKE %s
              )
            ORDER BY CASE WHEN lower(trim(e.title)) = lower(trim(%s)) THEN 0 ELSE 1 END, e.id ASC
            LIMIT %s
            """,
            [*vis_params, "archived", lg, like_pat, lg, int(limit)],
        )
    out: List[int] = []
    for row in cur.fetchall() or []:
        try:
            eid = int(row.get("id") or 0)
        except (TypeError, ValueError):
            continue
        if eid > 0:
            out.append(eid)
    return out
 def _load_supplemental_exercise_rows(
    cur,
    *,
@ -1000,10 +1081,31 @@ def _match_roadmap_slot(
    step_kind = resolve_step_exercise_kind_filter(stage_spec, body.exercise_kind_any)
    supplemental_ids = _supplemental_exercise_ids_from_body(cur, body)
    lg_candidates = _fetch_learning_goal_library_candidate_ids(
        cur,
        tenant=tenant,
        progression_graph_id=body.progression_graph_id,
        learning_goal=stage_goal,
    )
    supplemental_ids = list(
        dict.fromkeys(
            int(x)
            for x in [
                *supplemental_ids,
                *lg_candidates,
                slot_priority_exercise_id,
            ]
            if x is not None and int(x) > 0
        )
    )
    priority_ids = list(
        dict.fromkeys(
-            x
+            int(x)
-            for x in [slot_priority_exercise_id, *(body.retrieval_boost_exercise_ids or [])]
+            for x in [
                slot_priority_exercise_id,
                *(body.retrieval_boost_exercise_ids or []),
                *lg_candidates[:8],
            ]
            if x is not None and int(x) > 0
        )
    )
--- a/backend/planning_exercise_semantics.py
+++ b/backend/planning_exercise_semantics.py
@ -955,6 +955,7 @@ def enrich_brief_with_path_constraints(
 _MIN_STAGE_FIT_SEMANTIC = 0.30
 _MIN_STAGE_FIT_RELAXED = 0.20
 _MIN_TITLE_EQUIV_SEMANTIC = 0.15
 _MIN_ROADMAP_FALLBACK_RANK = 0.15
 def build_stage_match_brief(
@ -1260,6 +1261,76 @@ def exercise_passes_path_semantic_gate(
    return False
 def _pick_roadmap_rank_fallback(
    hits: List[Dict[str, Any]],
    used_exercise_ids: Set[int],
    *,
    stage_learning_goal: str,
    stage_anti_patterns: Optional[Sequence[str]] = None,
    path_primary_topic: Optional[str] = None,
    path_technique_excludes: Optional[Sequence[str]] = None,
 ) -> Optional[Dict[str, Any]]:
    """
    Roadmap-Notfall: bester Treffer nach Stufen-Ranking, wenn striktes Gate leer läuft.
    Filtert weiterhin Ausschlüsse und Technik-Scope (Kumite etc.), aber ohne
    Mindest-Semantik-Schwelle — so finden auch wortnahe Bibliotheks-Übungen den Slot.
    """
    stage_goal = (stage_learning_goal or "").strip()
    if not stage_goal or not hits:
        return None
    best: Optional[Dict[str, Any]] = None
    best_key: Tuple[float, float] = (-1.0, -1.0)
    for hit in hits:
        try:
            eid = int(hit["id"])
        except (TypeError, ValueError, KeyError):
            continue
        if eid in used_exercise_ids:
            continue
        title = str(hit.get("title") or "")
        summary = str(hit.get("summary") or "")
        goal_text = str(hit.get("goal") or hit.get("exercise_goal") or "")
        blob = _blob_from_fields(title, summary, goal_text, [])
        constraints = parse_stage_goal_constraints(stage_goal, stage_anti_patterns)
        if constraints.exclude_phrases and _blob_matches_stage_excludes(
            blob, constraints.exclude_phrases
        ):
            continue
        title_equiv = exercise_title_equivalent_to_stage_goal(title, stage_goal)
        primary = (path_primary_topic or "").strip()
        if primary and not title_equiv:
            tech_excludes = list(path_technique_excludes or [])
            for item in technique_sibling_excludes(primary):
                if item not in tech_excludes:
                    tech_excludes.append(item)
            if not exercise_passes_technique_path_scope(
                primary_topic=primary,
                title=title,
                summary=summary,
                goal=goal_text,
                learning_goal=stage_goal,
                sibling_excludes=tech_excludes,
                relaxed=True,
            ):
                continue
        rank_sem = float(
            hit.get("stage_rank_semantic")
            or hit.get("stage_semantic_score")
            or hit.get("semantic_score")
            or 0.0
        )
        score = float(hit.get("score") or 0.0)
        key = (rank_sem, score)
        if key > best_key:
            best_key = key
            best = hit
    if best is None or best_key[0] < _MIN_ROADMAP_FALLBACK_RANK:
        return None
    return best
 def pick_best_path_hit(
    hits: List[Dict[str, Any]],
    used_exercise_ids: Set[int],
@ -1341,7 +1412,16 @@ def pick_best_path_hit(
    if roadmap_stage_match:
        chosen = _scan(strict=False)
-        return chosen
+        if chosen:
            return chosen
        return _pick_roadmap_rank_fallback(
            hits,
            used_exercise_ids,
            stage_learning_goal=stage_goal,
            stage_anti_patterns=stage_anti_patterns,
            path_primary_topic=path_primary_topic,
            path_technique_excludes=path_technique_excludes,
        )
    chosen = _scan(strict=False)
    if chosen:
--- a/backend/planning_progression_roadmap.py
+++ b/backend/planning_progression_roadmap.py
@ -856,7 +856,7 @@ def build_roadmap_unfilled_gap_specs(
                "roadmap_major_step_index": stage_spec.major_step_index,
            }
        )
-    return specs[:5]
+    return specs[:12]
 def build_stage_specs(
--- a/backend/tests/test_planning_roadmap_stage_match.py
+++ b/backend/tests/test_planning_roadmap_stage_match.py
@ -367,6 +367,39 @@ def test_stage_fit_passes_for_title_equivalent_with_sufficient_semantic_score():
    )
 def test_roadmap_rank_fallback_picks_best_stage_semantic():
    from planning_exercise_semantics import _pick_roadmap_rank_fallback
    stage_goal = "Hüftmobilität für Mawashi Geri"
    hits = [
        {
            "id": 1,
            "title": "Hüftmobilität für Mawashi Geri",
            "summary": "Aufwärmen",
            "goal": "",
            "score": 0.9,
            "stage_rank_semantic": 0.32,
        },
        {
            "id": 2,
            "title": "Mawashi Hüftdehnung",
            "summary": "Adduktoren und Hüfte",
            "goal": "Mobilität für Mawashi Geri",
            "score": 0.7,
            "stage_rank_semantic": 0.58,
        },
    ]
    chosen = _pick_roadmap_rank_fallback(
        hits,
        set(),
        stage_learning_goal=stage_goal,
        path_primary_topic="mawashi geri",
        path_technique_excludes=technique_sibling_excludes("mawashi geri"),
    )
    assert chosen is not None
    assert int(chosen["id"]) == 2
 def test_pick_best_prefers_semantic_fit_over_coincidental_title():
    stage_goal = "Hüftmobilität für Mawashi Geri"
    stage_brief = build_stage_match_brief(learning_goal=stage_goal)
--- a/frontend/src/utils/progressionGraphDraft.js
+++ b/frontend/src/utils/progressionGraphDraft.js
@ -727,7 +727,7 @@ export function slotsToSlotAssignments(draft) {
    }))
 }
-/** Alle Graph-Übungs-IDs für Retriever-Boost (Slots + Geschwister). */
+/** Alle Graph-Übungs-IDs für Retriever-Boost (Slots + Geschwister + gespeichertes Artefakt). */
 export function draftRetrievalBoostExerciseIds(draft) {
  const ids = new Set()
  for (const slot of draft.slots || []) {
@ -737,6 +737,13 @@ export function draftRetrievalBoostExerciseIds(draft) {
      if (sib.kind === 'library' && sib.exerciseId != null) ids.add(sib.exerciseId)
    }
  }
  const saved = draft?.slot_contents || draft?.planningArtifact?.slot_contents
  if (Array.isArray(saved)) {
    for (const raw of saved) {
      const eid = raw?.primary?.exercise_id ?? raw?.exercise_id
      if (eid != null && Number.isFinite(Number(eid))) ids.add(Number(eid))
    }
  }
  return [...ids]
 }