From 713a344d170f309bfd8083f9843a9cf908e76da2 Mon Sep 17 00:00:00 2001
From: Lars <Lars@stommer.de>
Date: Thu, 11 Jun 2026 10:40:25 +0200
Subject: [PATCH] Enhance Roadmap Step Handling and Off-Topic Logic

- Improved off-topic step handling by incorporating roadmap major step indices for better indexing and detection.
- Refactored `collect_gap_fill_specs` to streamline the insertion logic for off-topic steps, ensuring correct placement based on major step indices.
- Introduced `_normalize_roadmap_steps_coverage` function to standardize roadmap steps coverage, enhancing the handling of missing slots.
- Added `prune_stripped_after_rematch` function to clean up stripped off-topic steps after rematching, improving the overall rematching process.
- Updated tests to validate new rematching and off-topic handling features, ensuring robustness against edge cases.
- Incremented application version to reflect these updates.
---
 backend/planning_exercise_path_ai_fill.py     |  26 ++-
 backend/planning_exercise_path_builder.py     | 203 ++++++++++++++----
 backend/planning_exercise_path_qa.py          |   9 +-
 backend/planning_path_rematch.py              |  30 +++
 backend/tests/test_planning_path_rematch.py   |  41 +++-
 .../ExerciseProgressionPathBuilder.jsx        |   5 +-
 frontend/src/utils/progressionGraphDraft.js   |   8 +
 7 files changed, 269 insertions(+), 53 deletions(-)

diff --git a/backend/planning_exercise_path_ai_fill.py b/backend/planning_exercise_path_ai_fill.py
index ef1f5bf..c33a5bc 100644
--- a/backend/planning_exercise_path_ai_fill.py
+++ b/backend/planning_exercise_path_ai_fill.py
@@ -363,15 +363,35 @@ def collect_gap_fill_specs(
         )
 
     for ot in off_topic_steps:
-        idx = int(ot.get("step_index") or 0)
-        if idx <= 0 or idx >= len(steps) - 1:
+        major_idx = ot.get("roadmap_major_step_index")
+        idx: Optional[int] = None
+        if major_idx is not None:
+            try:
+                mi = int(major_idx)
+            except (TypeError, ValueError):
+                mi = None
+            if mi is not None:
+                idx = next(
+                    (
+                        i
+                        for i, s in enumerate(steps)
+                        if s.get("roadmap_major_step_index") is not None
+                        and int(s["roadmap_major_step_index"]) == mi
+                    ),
+                    None,
+                )
+        if idx is None:
+            idx = int(ot.get("step_index") or 0)
+        if idx < 0 or idx >= len(steps):
             continue
         phase = ot.get("expected_phase") or "vertiefung"
+        insert_after = max(idx - 1, -1)
         add(
             {
                 "source": "off_topic",
-                "insert_after_index": idx - 1,
+                "insert_after_index": insert_after,
                 "replace_step_index": idx,
+                "roadmap_major_step_index": major_idx,
                 "gap": {
                     "expected_phase": phase,
                     "off_topic_title": ot.get("title"),
diff --git a/backend/planning_exercise_path_builder.py b/backend/planning_exercise_path_builder.py
index 7a9b4c7..7973571 100644
--- a/backend/planning_exercise_path_builder.py
+++ b/backend/planning_exercise_path_builder.py
@@ -14,7 +14,11 @@ from pydantic import BaseModel, Field
 from tenant_context import TenantContext, library_content_visibility_sql
 from planning_exercise_profiles import PlanningTargetProfile
 from planning_path_qa_pipeline import run_multistage_path_qa
-from planning_path_rematch import collect_rematch_slot_indices, rematch_roadmap_slots
+from planning_path_rematch import (
+    collect_rematch_slot_indices,
+    prune_stripped_after_rematch,
+    rematch_roadmap_slots,
+)
 from planning_stage_context import build_contextualized_stage_goal, resolve_path_start_target
 from planning_exercise_path_qa import (
     apply_llm_path_reorder,
@@ -704,6 +708,129 @@ def _match_roadmap_slot(
     return step, None
 
 
+def _normalize_roadmap_steps_coverage(
+    steps: List[Dict[str, Any]],
+    *,
+    roadmap_ctx: ProgressionRoadmapContext,
+    max_steps: int,
+) -> List[Dict[str, Any]]:
+    """Ein Eintrag pro Roadmap-Major-Step — fehlende Slots als leere Platzhalter."""
+    stage_specs = list(roadmap_ctx.stage_specs or [])[:max_steps]
+    if not stage_specs:
+        return steps
+
+    major_by_index: Dict[int, MajorStep] = {}
+    if roadmap_ctx.roadmap:
+        major_by_index = {m.index: m for m in roadmap_ctx.roadmap.major_steps}
+
+    by_major: Dict[int, Dict[str, Any]] = {}
+    for raw in steps:
+        step = dict(raw)
+        midx = step.get("roadmap_major_step_index")
+        if midx is not None:
+            by_major[int(midx)] = step
+
+    out: List[Dict[str, Any]] = []
+    for spec in sorted(stage_specs, key=lambda s: s.major_step_index):
+        midx = int(spec.major_step_index)
+        if midx in by_major:
+            out.append(by_major[midx])
+            continue
+        major = major_by_index.get(midx)
+        goal = (spec.learning_goal or "").strip()
+        out.append(
+            {
+                "exercise_id": None,
+                "variant_id": None,
+                "title": goal or f"Slot {midx + 1}",
+                "is_ai_proposal": False,
+                "roadmap_major_step_index": midx,
+                "roadmap_phase": major.phase if major else None,
+                "roadmap_learning_goal": goal or None,
+                "roadmap_match_source": "stage_spec",
+                "reasons": [],
+            }
+        )
+    return out
+
+
+def _maybe_rematch_roadmap_after_strip(
+    cur,
+    *,
+    tenant: TenantContext,
+    body: ProgressionPathSuggestRequest,
+    goal_query: str,
+    max_steps: int,
+    semantic_brief: PlanningSemanticBrief,
+    path_target_profile: PlanningTargetProfile,
+    path_intent: str,
+    roadmap_ctx: ProgressionRoadmapContext,
+    steps: List[Dict[str, Any]],
+    stripped_off_topic: List[Dict[str, Any]],
+    off_topic_before_strip: List[Dict[str, Any]],
+    roadmap_unfilled: List[Tuple[int, StageSpecArtifact]],
+) -> Tuple[
+    List[Dict[str, Any]],
+    List[Dict[str, Any]],
+    List[Dict[str, Any]],
+    List[Dict[str, Any]],
+    int,
+    List[Tuple[int, StageSpecArtifact]],
+]:
+    rematch_log: List[Dict[str, Any]] = []
+    rematch_rounds = 0
+    if not body.auto_rematch_after_qa or not roadmap_ctx.stage_specs:
+        return steps, rematch_log, stripped_off_topic, [], rematch_rounds, roadmap_unfilled
+
+    slot_indices, rematch_reasons = collect_rematch_slot_indices(
+        stripped_off_topic=stripped_off_topic,
+        off_topic_steps=off_topic_before_strip if not stripped_off_topic else [],
+        optimization_hints=[],
+        stage_specs=roadmap_ctx.stage_specs,
+    )
+    if not slot_indices:
+        return steps, rematch_log, stripped_off_topic, [], rematch_rounds, roadmap_unfilled
+
+    steps, rematch_log, rematch_new_unfilled = rematch_roadmap_slots(
+        cur,
+        tenant=tenant,
+        body=body,
+        goal_query=goal_query,
+        max_steps=max_steps,
+        semantic_brief=semantic_brief,
+        path_target_profile=path_target_profile,
+        path_intent=path_intent,
+        roadmap_ctx=roadmap_ctx,
+        steps=steps,
+        slot_indices=slot_indices,
+        rematch_reasons=rematch_reasons,
+        match_slot_fn=_match_roadmap_slot,
+    )
+    rematch_rounds = 1
+    stripped_off_topic = prune_stripped_after_rematch(stripped_off_topic, rematch_log)
+    if rematch_new_unfilled:
+        remapped = {sp.major_step_index for _, sp in rematch_new_unfilled}
+        roadmap_unfilled = [
+            item for item in roadmap_unfilled if item[1].major_step_index not in remapped
+        ]
+        roadmap_unfilled.extend(rematch_new_unfilled)
+
+    off_topic_steps = detect_off_topic_steps(
+        cur,
+        steps,
+        brief=semantic_brief,
+        goal_query=goal_query,
+    )
+    return (
+        steps,
+        rematch_log,
+        stripped_off_topic,
+        off_topic_steps,
+        rematch_rounds,
+        roadmap_unfilled,
+    )
+
+
 def _build_steps_roadmap_first(
     cur,
     *,
@@ -906,7 +1033,6 @@ def _run_evaluate_only_path_qa(
             brief=semantic_brief,
             goal_query=goal_query,
         )
-        steps, stripped_off_topic = strip_off_topic_steps_from_path(steps, off_topic_steps)
         llm_gap_specs = parse_llm_suggested_new_exercises(
             llm_qa,
             brief=semantic_brief,
@@ -918,7 +1044,7 @@ def _run_evaluate_only_path_qa(
             gap_specs = collect_gap_fill_specs(
                 steps=steps,
                 unfilled_gaps=fresh_large_gaps or unfilled_gaps,
-                off_topic_steps=off_topic_steps if not stripped_off_topic else [],
+                off_topic_steps=off_topic_steps,
                 llm_specs=llm_gap_specs,
                 brief=semantic_brief,
                 goal_query=goal_query,
@@ -1374,49 +1500,31 @@ def suggest_progression_path(
                 roadmap_first=roadmap_first,
             )
 
-        if (
-            roadmap_first
-            and body.auto_rematch_after_qa
-            and roadmap_ctx is not None
-            and roadmap_ctx.stage_specs
-        ):
-            slot_indices, rematch_reasons = collect_rematch_slot_indices(
+        if roadmap_first and roadmap_ctx is not None:
+            (
+                steps,
+                rematch_log,
+                stripped_off_topic,
+                rematch_off_topic,
+                rematch_rounds,
+                roadmap_unfilled,
+            ) = _maybe_rematch_roadmap_after_strip(
+                cur,
+                tenant=tenant,
+                body=body,
+                goal_query=goal_query,
+                max_steps=max_steps,
+                semantic_brief=semantic_brief,
+                path_target_profile=path_target_profile,
+                path_intent=path_intent,
+                roadmap_ctx=roadmap_ctx,
+                steps=steps,
                 stripped_off_topic=stripped_off_topic,
-                off_topic_steps=off_topic_before_strip if not stripped_off_topic else [],
-                optimization_hints=[],
-                stage_specs=roadmap_ctx.stage_specs,
+                off_topic_before_strip=off_topic_before_strip,
+                roadmap_unfilled=roadmap_unfilled,
             )
-            if slot_indices:
-                steps, rematch_log, rematch_new_unfilled = rematch_roadmap_slots(
-                    cur,
-                    tenant=tenant,
-                    body=body,
-                    goal_query=goal_query,
-                    max_steps=max_steps,
-                    semantic_brief=semantic_brief,
-                    path_target_profile=path_target_profile,
-                    path_intent=path_intent,
-                    roadmap_ctx=roadmap_ctx,
-                    steps=steps,
-                    slot_indices=slot_indices,
-                    rematch_reasons=rematch_reasons,
-                    match_slot_fn=_match_roadmap_slot,
-                )
-                rematch_rounds = 1
-                if rematch_new_unfilled:
-                    remapped = {sp.major_step_index for _, sp in rematch_new_unfilled}
-                    roadmap_unfilled = [
-                        item
-                        for item in roadmap_unfilled
-                        if item[1].major_step_index not in remapped
-                    ]
-                    roadmap_unfilled.extend(rematch_new_unfilled)
-                off_topic_steps = detect_off_topic_steps(
-                    cur,
-                    steps,
-                    brief=semantic_brief,
-                    goal_query=goal_query,
-                )
+            if rematch_off_topic:
+                off_topic_steps = rematch_off_topic
                 gaps = detect_path_gaps(
                     cur,
                     steps,
@@ -1500,6 +1608,13 @@ def suggest_progression_path(
         path_qa["rematch_log"] = rematch_log
         path_qa["rematch_rounds"] = rematch_rounds
 
+    if roadmap_first and roadmap_ctx is not None:
+        steps = _normalize_roadmap_steps_coverage(
+            steps,
+            roadmap_ctx=roadmap_ctx,
+            max_steps=max_steps,
+        )
+
     target_profile_summary = path_target_profile.to_summary_dict(cur)
     retrieval_parts = ["profile_v1", "full_library", "path_builder", "semantics"]
     if roadmap_first:
diff --git a/backend/planning_exercise_path_qa.py b/backend/planning_exercise_path_qa.py
index 8b833a4..82b5847 100644
--- a/backend/planning_exercise_path_qa.py
+++ b/backend/planning_exercise_path_qa.py
@@ -416,7 +416,14 @@ def detect_off_topic_steps(
     goal_query: Optional[str] = None,
 ) -> List[Dict[str, Any]]:
     """Schritte ohne Bezug zum Pfad-Thema (z. B. reine Kraftübungen bei Mae Geri)."""
-    if brief.semantic_strength < 0.55 or len(steps) < 2:
+    if len(steps) < 2:
+        return []
+    roadmap_stage_steps = any(
+        (step.get("roadmap_match_source") == "stage_spec")
+        or (step.get("roadmap_learning_goal") or "").strip()
+        for step in steps
+    )
+    if brief.semantic_strength < 0.55 and not roadmap_stage_steps:
         return []
 
     path_anti = resolve_path_anti_patterns(goal_query or "", semantic_brief=brief)
diff --git a/backend/planning_path_rematch.py b/backend/planning_path_rematch.py
index 1faeba8..d86f5b9 100644
--- a/backend/planning_path_rematch.py
+++ b/backend/planning_path_rematch.py
@@ -137,6 +137,8 @@ def rematch_roadmap_slots(
             for m, s in steps_by_major.items()
             if s.get("exercise_id") is not None
         }
+        if old and old.get("exercise_id") is not None:
+            used.add(int(old["exercise_id"]))
         planned_ids, anchor_id, anchor_variant_id = _context_before_major(
             steps_by_major, int(major_idx)
         )
@@ -196,7 +198,35 @@ def rematch_roadmap_slots(
     return ordered, rematch_log, new_unfilled
 
 
+def prune_stripped_after_rematch(
+    stripped_off_topic: Sequence[Mapping[str, Any]],
+    rematch_log: Sequence[Mapping[str, Any]],
+) -> List[Dict[str, Any]]:
+    """Entfernt aus stripped_off_topic Slots, die per Rematch ersetzt wurden."""
+    replaced: Set[int] = set()
+    for entry in rematch_log or []:
+        if not isinstance(entry, dict):
+            continue
+        if str(entry.get("action") or "") != "replaced":
+            continue
+        midx = entry.get("roadmap_major_step_index")
+        if midx is not None:
+            replaced.add(int(midx))
+    if not replaced:
+        return list(stripped_off_topic or [])
+    out: List[Dict[str, Any]] = []
+    for item in stripped_off_topic or []:
+        if not isinstance(item, dict):
+            continue
+        midx = item.get("roadmap_major_step_index")
+        if midx is not None and int(midx) in replaced:
+            continue
+        out.append(dict(item))
+    return out
+
+
 __all__ = [
     "collect_rematch_slot_indices",
+    "prune_stripped_after_rematch",
     "rematch_roadmap_slots",
 ]
diff --git a/backend/tests/test_planning_path_rematch.py b/backend/tests/test_planning_path_rematch.py
index fbacba9..46e4b02 100644
--- a/backend/tests/test_planning_path_rematch.py
+++ b/backend/tests/test_planning_path_rematch.py
@@ -95,8 +95,9 @@ def test_rematch_roadmap_slots_replaces_only_target_slot():
 
     def _fake_match(cur, *, stage_spec, used, **kwargs):
         assert stage_spec.major_step_index == 1
-        assert 20 not in used
+        assert 20 in used
         assert 10 in used
+        assert 30 in used
         return (
             {
                 "exercise_id": 21,
@@ -131,3 +132,41 @@ def test_rematch_roadmap_slots_replaces_only_target_slot():
     assert log[0]["replaced_exercise_id"] == 20
     assert log[0]["new_exercise_id"] == 21
     assert not unfilled
+
+
+def test_rematch_excludes_replaced_exercise_from_used():
+    specs = _stage_specs()
+    ctx = ProgressionRoadmapContext(
+        goal_query="Mawashi Geri",
+        max_steps=3,
+        stage_specs=specs,
+    )
+    steps = [
+        {"exercise_id": 10, "title": "OK", "roadmap_major_step_index": 0},
+        {"exercise_id": 99, "title": "Mae Geri", "roadmap_major_step_index": 1},
+    ]
+    seen_used = []
+
+    def _fake_match(cur, *, used, stage_spec, **kwargs):
+        seen_used.append(set(used))
+        return (
+            {"exercise_id": 42, "title": "Neu", "roadmap_major_step_index": stage_spec.major_step_index},
+            None,
+        )
+
+    rematch_roadmap_slots(
+        None,
+        tenant=None,
+        body=None,
+        goal_query="Mawashi",
+        max_steps=3,
+        semantic_brief=None,
+        path_target_profile=None,
+        path_intent="",
+        roadmap_ctx=ctx,
+        steps=steps,
+        slot_indices={1},
+        rematch_reasons={1: "technique_scope"},
+        match_slot_fn=_fake_match,
+    )
+    assert 99 in seen_used[0]
diff --git a/frontend/src/components/ExerciseProgressionPathBuilder.jsx b/frontend/src/components/ExerciseProgressionPathBuilder.jsx
index 598a620..327a0a9 100644
--- a/frontend/src/components/ExerciseProgressionPathBuilder.jsx
+++ b/frontend/src/components/ExerciseProgressionPathBuilder.jsx
@@ -1022,10 +1022,7 @@ export default function ExerciseProgressionPathBuilder({
   const applyPathMatchResponse = (res, q) => {
     const qa = res?.path_qa || null
     const rawRows = (Array.isArray(res?.steps) ? res.steps : []).map(mapApiStepToRow)
-    const rows =
-      Array.isArray(qa?.stripped_off_topic_steps) && qa.stripped_off_topic_steps.length > 0
-        ? rawRows
-        : applyOffTopicFlags(rawRows, qa)
+    const rows = applyOffTopicFlags(rawRows, qa)
     if (rows.length < 2) {
       throw new Error('Zu wenig Schritte im Vorschlag.')
     }
diff --git a/frontend/src/utils/progressionGraphDraft.js b/frontend/src/utils/progressionGraphDraft.js
index 18ee771..119494b 100644
--- a/frontend/src/utils/progressionGraphDraft.js
+++ b/frontend/src/utils/progressionGraphDraft.js
@@ -750,12 +750,14 @@ export function applyMatchStepsToSlots(draft, apiSteps) {
     siblings: [...(slot.siblings || [])],
   }))
 
+  const touchedMajors = new Set()
   for (const step of steps) {
     if (step.roadmap_major_step_index == null || !Number.isFinite(Number(step.roadmap_major_step_index))) {
       continue
     }
     const idx = Number(step.roadmap_major_step_index)
     if (idx < 0 || idx >= nextSlots.length) continue
+    touchedMajors.add(idx)
 
     const isProposal = Boolean(step.is_ai_proposal) || step.exercise_id == null
     if (isProposal) {
@@ -773,6 +775,12 @@ export function applyMatchStepsToSlots(draft, apiSteps) {
     }
   }
 
+  for (let i = 0; i < nextSlots.length; i += 1) {
+    if (!touchedMajors.has(i)) {
+      nextSlots[i].primary = emptySlotExercise()
+    }
+  }
+
   return syncProgressionRoadmapFromSlots({ ...draft, slots: nextSlots, dirty: true })
 }