From d448c3191f30fadd925c3814c7c26b64a25079bd Mon Sep 17 00:00:00 2001
From: Lars <Lars@stommer.de>
Date: Fri, 12 Jun 2026 07:57:19 +0200
Subject: [PATCH] Enhance Stage Mismatch Handling and Roadmap Slot Purging

- Introduced `_purge_stage_mismatch_roadmap_slots` to clear slots with persistent stage mismatches, improving the relevance of exercise suggestions.
- Updated `collect_gap_fill_specs` to handle stage mismatch issues more effectively, providing clearer rationale and title hints for off-topic exercises.
- Modified `_filter_learning_goal_candidate_ids` to enforce stricter filtering criteria, ensuring only relevant candidates are considered.
- Enhanced `rematch_roadmap_slots` to incorporate slot assignment history, preventing conflicts with previously assigned exercises.
- Bumped version to 0.8.230 to reflect the new features and improvements.
---
 backend/planning_exercise_path_ai_fill.py     |  22 +++-
 backend/planning_exercise_path_builder.py     | 111 +++++++++++++++++-
 backend/planning_exercise_semantics.py        |  82 +++++++++----
 backend/planning_path_rematch.py              |  13 ++
 .../test_planning_roadmap_stage_match.py      |   4 +-
 backend/version.py                            |   4 +-
 6 files changed, 201 insertions(+), 35 deletions(-)

diff --git a/backend/planning_exercise_path_ai_fill.py b/backend/planning_exercise_path_ai_fill.py
index 81373bf..a679584 100644
--- a/backend/planning_exercise_path_ai_fill.py
+++ b/backend/planning_exercise_path_ai_fill.py
@@ -425,9 +425,22 @@ def collect_gap_fill_specs(
         step_a, step_b = _step_neighbors_at_index(steps, idx)
         phase = ot.get("expected_phase") or "vertiefung"
         insert_after = max(idx - 1, -1)
+        stage_goal = str(ot.get("roadmap_learning_goal") or "").strip()
+        if str(ot.get("issue") or "") == "stage_mismatch" and stage_goal:
+            title_hint = stage_goal[:120]
+            rationale = (
+                f"Keine passende Bibliotheks-Übung für Stufen-Lernziel „{stage_goal[:100]}“."
+            )
+            sketch_rationale = (
+                f"Slot braucht Übung passend zu: {stage_goal[:200]}"
+            )
+        else:
+            title_hint = f"{topic} — {phase} (Ersatz für themenfremden Schritt)"
+            rationale = f"Schritt „{ot.get('title')}“ passt nicht zum Pfad-Thema."
+            sketch_rationale = f"Ersetzt themenfremden Schritt „{ot.get('title')}“."
         add(
             {
-                "source": "off_topic",
+                "source": "off_topic" if ot.get("issue") != "stage_mismatch" else "stage_mismatch",
                 "insert_after_index": insert_after,
                 "replace_step_index": idx,
                 "roadmap_major_step_index": major_idx,
@@ -435,18 +448,19 @@ def collect_gap_fill_specs(
                     "expected_phase": phase,
                     "off_topic_title": ot.get("title"),
                     "off_topic_exercise_id": ot.get("exercise_id"),
+                    "roadmap_learning_goal": stage_goal or None,
                 },
                 "phase": phase,
-                "title_hint": f"{topic} — {phase} (Ersatz für themenfremden Schritt)",
+                "title_hint": title_hint,
                 "sketch": _default_sketch(
                     goal_query=goal_query,
                     brief=brief,
                     step_a=step_a,
                     step_b=step_b,
                     phase=str(phase),
-                    rationale=f"Ersetzt themenfremden Schritt „{ot.get('title')}“.",
+                    rationale=sketch_rationale,
                 ),
-                "rationale": f"Schritt „{ot.get('title')}“ passt nicht zum Pfad-Thema.",
+                "rationale": rationale,
             }
         )
 
diff --git a/backend/planning_exercise_path_builder.py b/backend/planning_exercise_path_builder.py
index 79c0610..384475a 100644
--- a/backend/planning_exercise_path_builder.py
+++ b/backend/planning_exercise_path_builder.py
@@ -269,7 +269,7 @@ def _filter_learning_goal_candidate_ids(
             anti_patterns=stage_anti,
             path_primary_topic=path_primary or None,
             path_technique_excludes=path_tech_excludes,
-            relaxed=True,
+            relaxed=False,
         ):
             out.append(eid)
     return out
@@ -1322,6 +1322,78 @@ def _normalize_roadmap_steps_coverage(
     return out
 
 
+def _purge_stage_mismatch_roadmap_slots(
+    cur,
+    *,
+    steps: List[Dict[str, Any]],
+    roadmap_ctx: ProgressionRoadmapContext,
+    goal_query: str,
+    semantic_brief: PlanningSemanticBrief,
+) -> Tuple[List[Dict[str, Any]], List[Tuple[int, StageSpecArtifact]]]:
+    """Leert Slots mit persistentem stage_mismatch — KI-Gap statt schlechter Bibliotheks-Übung."""
+    issues = detect_off_topic_steps(
+        cur,
+        steps,
+        brief=semantic_brief,
+        goal_query=goal_query,
+    )
+    purge_majors: Set[int] = set()
+    for item in issues:
+        if str(item.get("issue") or "") != "stage_mismatch":
+            continue
+        midx = item.get("roadmap_major_step_index")
+        if midx is None:
+            continue
+        try:
+            purge_majors.add(int(midx))
+        except (TypeError, ValueError):
+            continue
+    if not purge_majors:
+        return steps, []
+
+    stage_specs = list(roadmap_ctx.stage_specs or [])
+    spec_by_major = {int(s.major_step_index): s for s in stage_specs}
+    major_by_index: Dict[int, MajorStep] = {}
+    if roadmap_ctx.roadmap:
+        major_by_index = {m.index: m for m in roadmap_ctx.roadmap.major_steps}
+
+    new_unfilled: List[Tuple[int, StageSpecArtifact]] = []
+    out: List[Dict[str, Any]] = []
+    for raw in steps:
+        step = dict(raw)
+        midx = step.get("roadmap_major_step_index")
+        if midx is None or int(midx) not in purge_majors:
+            out.append(step)
+            continue
+        major_idx = int(midx)
+        spec = spec_by_major.get(major_idx)
+        if spec is None:
+            out.append(step)
+            continue
+        step_index = next(
+            (i for i, sp in enumerate(stage_specs) if int(sp.major_step_index) == major_idx),
+            major_idx,
+        )
+        major = major_by_index.get(major_idx)
+        goal = (spec.learning_goal or step.get("roadmap_learning_goal") or "").strip()
+        out.append(
+            {
+                "exercise_id": None,
+                "variant_id": None,
+                "title": goal or f"Slot {major_idx + 1}",
+                "is_ai_proposal": False,
+                "roadmap_major_step_index": major_idx,
+                "roadmap_phase": major.phase if major else step.get("roadmap_phase"),
+                "roadmap_learning_goal": goal or None,
+                "roadmap_match_source": "unfilled",
+                "slot_status": "unfilled",
+                "reasons": ["Keine passende Bibliotheks-Übung für Stufen-Lernziel"],
+            }
+        )
+        new_unfilled.append((step_index, spec))
+    return out, new_unfilled
+
+
 def _merge_rematch_unfilled(
     roadmap_unfilled: List[Tuple[int, StageSpecArtifact]],
     rematch_new_unfilled: List[Tuple[int, StageSpecArtifact]],
@@ -1401,6 +1473,16 @@ def _run_roadmap_rematch_loop(
 
     _track_rejected(off_topic_before_strip)
     _track_rejected(current_stripped)
+    slot_assignment_history: Dict[int, Set[int]] = {}
+    for raw in steps:
+        midx = raw.get("roadmap_major_step_index")
+        eid = raw.get("exercise_id")
+        if midx is None or eid is None:
+            continue
+        try:
+            slot_assignment_history.setdefault(int(midx), set()).add(int(eid))
+        except (TypeError, ValueError):
+            continue
 
     for round_idx in range(max_rounds):
         mini_qa = run_multistage_path_qa(
@@ -1462,6 +1544,7 @@ def _run_roadmap_rematch_loop(
             rematch_reasons=rematch_reasons,
             match_slot_fn=_match_roadmap_slot,
             rejected_by_major=rejected_by_major,
+            slot_assignment_history=slot_assignment_history,
         )
         rematch_rounds += 1
         for entry in round_log:
@@ -1475,6 +1558,16 @@ def _run_roadmap_rematch_loop(
                     rejected_by_major.setdefault(int(midx), set()).add(int(rid))
                 except (TypeError, ValueError):
                     pass
+            new_eid = entry.get("new_exercise_id")
+            if (
+                str(entry.get("action") or "") == "replaced"
+                and new_eid is not None
+                and midx is not None
+            ):
+                try:
+                    slot_assignment_history.setdefault(int(midx), set()).add(int(new_eid))
+                except (TypeError, ValueError):
+                    pass
 
         current_stripped = prune_stripped_after_rematch(current_stripped, round_log)
         roadmap_unfilled = _merge_rematch_unfilled(roadmap_unfilled, rematch_new_unfilled)
@@ -1500,6 +1593,22 @@ def _run_roadmap_rematch_loop(
             goal_query=goal_query,
         )
 
+    steps, purged_unfilled = _purge_stage_mismatch_roadmap_slots(
+        cur,
+        steps=steps,
+        roadmap_ctx=roadmap_ctx,
+        goal_query=goal_query,
+        semantic_brief=semantic_brief,
+    )
+    if purged_unfilled:
+        roadmap_unfilled = _merge_rematch_unfilled(roadmap_unfilled, purged_unfilled)
+        off_topic_steps = detect_off_topic_steps(
+            cur,
+            steps,
+            brief=semantic_brief,
+            goal_query=goal_query,
+        )
+
     return (
         steps,
         rematch_log,
diff --git a/backend/planning_exercise_semantics.py b/backend/planning_exercise_semantics.py
index 7fe9521..c0433f7 100644
--- a/backend/planning_exercise_semantics.py
+++ b/backend/planning_exercise_semantics.py
@@ -865,6 +865,11 @@ def stage_focus_phrases_from_learning_goal(learning_goal: str) -> List[str]:
     tokens = _significant_stage_tokens(lg, strip_negated=True)
     phrases: List[str] = []
     norm_lg = _normalize_phrase(lg)
+    tech_hit = _find_technique_in_text(norm_lg)
+    if tech_hit:
+        primary = tech_hit[0]
+        if primary not in phrases:
+            phrases.append(primary)
     if len(norm_lg) >= 8:
         phrases.append(norm_lg[:120])
     for i in range(len(tokens) - 1):
@@ -879,14 +884,22 @@ def stage_focus_phrases_from_learning_goal(learning_goal: str) -> List[str]:
 
 def stage_refinement_criteria_from_learning_goal(learning_goal: str) -> List[str]:
     """Erfolgskriterien für Phase C — nur aussagekräftige Mehrwort-Phrasen."""
+    lg = (learning_goal or "").strip()
+    if len(lg) < 3:
+        return []
+    norm_lg = _normalize_phrase(lg)
     out: List[str] = []
-    for phrase in stage_focus_phrases_from_learning_goal(learning_goal):
-        p = str(phrase or "").strip()
-        if not p:
+    if len(norm_lg) >= 15:
+        out.append(norm_lg[:120])
+    tokens = _significant_stage_tokens(lg, strip_negated=True)
+    for i in range(len(tokens) - 1):
+        a, b = tokens[i], tokens[i + 1]
+        if len(a) < 5 or len(b) < 5:
             continue
-        if " " in p or len(p) >= 12:
-            out.append(p[:120])
-    return out[:4]
+        pair = f"{a} {b}"
+        if len(pair) >= 12 and pair not in out:
+            out.append(pair)
+    return out[:3]
 
 
 def exercise_title_matches_peer_stage_goal(
@@ -1095,6 +1108,9 @@ def build_stage_match_brief(
     constraints = parse_stage_goal_constraints(lg)
     must: List[str] = []
     norm_lg = _normalize_phrase(lg)
+    tech_hit = _find_technique_in_text(norm_lg)
+    if tech_hit and tech_hit[0] not in must:
+        must.insert(0, tech_hit[0])
     if primary_path and primary_path not in must:
         must.insert(0, primary_path[:120])
     for token in constraints.positive_tokens:
@@ -1165,12 +1181,15 @@ def score_exercise_stage_fit(
         if part.lower().startswith("lernziel:"):
             lg_hint = part.split(":", 1)[-1].strip()
             break
+    if not lg_hint:
+        lg_hint = (stage_brief.retrieval_query or "").split("|")[0].strip()
     if not lg_hint:
         for mp in stage_brief.must_phrases or []:
             if mp and len(_normalize_phrase(mp)) >= 8:
                 lg_hint = mp
                 break
     focus_phrases = stage_focus_phrases_from_learning_goal(lg_hint) if lg_hint else []
+    tech_hit = _find_technique_in_text(_normalize_phrase(lg_hint)) if lg_hint else None
     if not focus_phrases:
         focus_phrases = [
             t
@@ -1185,6 +1204,16 @@ def score_exercise_stage_fit(
             score = min(1.0, score + bonus)
             if hits >= max(1, len(focus_phrases) // 2):
                 reasons = ["Stufen-Schwerpunkte im Übungstext", *reasons]
+        non_tech = [
+            p
+            for p in focus_phrases
+            if not tech_hit or _normalize_phrase(p) != tech_hit[0]
+        ]
+        specific_hits = sum(1 for p in non_tech if _phrase_in_blob(p, blob))
+        if tech_hit and _phrase_in_blob(tech_hit[0], blob) and specific_hits == 0:
+            score = min(score, 0.16)
+            if "Nur Technik-Bezug" not in reasons:
+                reasons = ["Nur Technik-Bezug, Stufen-Schwerpunkte fehlen", *reasons]
     learning_goal_for_equiv = lg_hint or (stage_brief.must_phrases[0] if stage_brief.must_phrases else "")
     if learning_goal_for_equiv and exercise_title_equivalent_to_stage_goal(title, learning_goal_for_equiv):
         score = max(score, 0.42)
@@ -1246,15 +1275,13 @@ def exercise_passes_stage_fit(
         learning_goal=lg,
         anti_patterns=anti_patterns,
     )
-    stage_sem = stage_semantic_score
-    if stage_sem is None:
-        stage_sem, _ = score_exercise_stage_fit(
-            title=title,
-            summary=summary,
-            goal=goal,
-            stage_brief=brief,
-            step_phase=step_phase,
-        )
+    stage_sem, _ = score_exercise_stage_fit(
+        title=title,
+        summary=summary,
+        goal=goal,
+        stage_brief=brief,
+        step_phase=step_phase,
+    )
 
     if relaxed:
         threshold = _MIN_STAGE_FIT_RELAXED
@@ -1262,7 +1289,19 @@ def exercise_passes_stage_fit(
         threshold = _MIN_TITLE_EQUIV_SEMANTIC
     else:
         threshold = min_stage_semantic
-    return float(stage_sem or 0.0) >= threshold
+
+    if float(stage_sem or 0.0) >= threshold:
+        return True
+
+    if relaxed and not title_equiv:
+        focus = stage_focus_phrases_from_learning_goal(lg)
+        tech = _find_technique_in_text(_normalize_phrase(lg))
+        non_tech = [p for p in focus if not tech or _normalize_phrase(p) != tech[0]]
+        specific_hits = sum(1 for p in non_tech if _phrase_in_blob(p, blob))
+        if specific_hits >= 2 and float(stage_sem or 0.0) >= 0.14:
+            return True
+
+    return False
 
 
 def apply_stage_match_retrieval_weights(brief: PlanningSemanticBrief) -> Dict[str, float]:
@@ -1539,16 +1578,7 @@ def pick_best_path_hit(
         chosen = _scan(strict=False)
         if chosen:
             return chosen
-        return _pick_roadmap_rank_fallback(
-            hits,
-            used_exercise_ids,
-            stage_learning_goal=stage_goal,
-            stage_anti_patterns=stage_anti_patterns,
-            path_primary_topic=path_primary_topic,
-            path_technique_excludes=path_technique_excludes,
-            stage_match_brief=stage_brief,
-            peer_learning_goals=peer_learning_goals,
-        )
+        return None
 
     chosen = _scan(strict=False)
     if chosen:
diff --git a/backend/planning_path_rematch.py b/backend/planning_path_rematch.py
index 9adcb5c..dc042ee 100644
--- a/backend/planning_path_rematch.py
+++ b/backend/planning_path_rematch.py
@@ -116,6 +116,7 @@ def rematch_roadmap_slots(
     rematch_reasons: Mapping[int, str],
     match_slot_fn,
     rejected_by_major: Optional[Mapping[int, Set[int]]] = None,
+    slot_assignment_history: Optional[Mapping[int, Set[int]]] = None,
 ) -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]], List[Tuple[int, StageSpecArtifact]]]:
     """
     Ersetzt nur betroffene Slots; andere Schritte und used-Set bleiben konsistent.
@@ -180,6 +181,18 @@ def rematch_roadmap_slots(
         )
 
         reason = str(rematch_reasons.get(int(major_idx)) or "rematch_slot")
+        if new_step:
+            try:
+                new_eid = int(new_step.get("exercise_id") or 0)
+            except (TypeError, ValueError):
+                new_eid = 0
+            hist = (
+                slot_assignment_history.get(int(major_idx), set())
+                if slot_assignment_history
+                else set()
+            )
+            if new_eid > 0 and new_eid in hist:
+                new_step = None
         if new_step:
             steps_by_major[int(major_idx)] = new_step
             rematch_log.append(
diff --git a/backend/tests/test_planning_roadmap_stage_match.py b/backend/tests/test_planning_roadmap_stage_match.py
index 21ae1c2..442505d 100644
--- a/backend/tests/test_planning_roadmap_stage_match.py
+++ b/backend/tests/test_planning_roadmap_stage_match.py
@@ -270,8 +270,8 @@ def test_pick_roadmap_relaxed_for_non_technique_stage():
         {
             "id": 11,
             "title": "Adduktoren Dehnung am Boden",
-            "summary": "Flexibilität Hüfte",
-            "goal": "Mobilität",
+            "summary": "Flexibilität Hüfte, Adduktoren dehnen",
+            "goal": "Mobilität — Adduktoren dehnen",
             "score": 0.68,
             "semantic_score": 0.22,
             "stage_semantic_score": 0.22,
diff --git a/backend/version.py b/backend/version.py
index aea1e0c..17b1603 100644
--- a/backend/version.py
+++ b/backend/version.py
@@ -1,6 +1,6 @@
 # Shinkan Jinkendo Version Information
 
-APP_VERSION = "0.8.229"
+APP_VERSION = "0.8.230"
 BUILD_DATE = "2026-05-22"
 DB_SCHEMA_VERSION = "20260607090"
 
@@ -38,7 +38,7 @@ MODULE_VERSIONS = {
     "skill_profiles": "1.0.0",  # Phase 3: gewichtetes Fähigkeiten-Profil + skill-discovery/suggestions
     "methods": "0.1.0",
     "exercises": "2.37.1",  # KI-Endpoints: feature_usage nach ai_calls consume
-    "planning_exercise_suggest": "0.23.4",  # Stufen-Match: Fallback mit Gate, Peer-Slot-Schutz, LG-Kandidaten-Filter
+    "planning_exercise_suggest": "0.23.5",  # Roadmap-Match strikt; stage_mismatch → unfilled + KI-Gap
     "training_units": "0.4.0",  # POST .../publish-to-framework: Ablauf aus geplanter Einheit → Rahmen-Slot-Blueprint
     "training_programs": "0.1.0",
     "planning": "0.15.0",  # Vorlagen: Strukturvorschau, Bearbeiten inkl. Split-Sessions + Beschreibung