Enhance Stage Mismatch Handling and Roadmap Slot Purging

- Introduced `_purge_stage_mismatch_roadmap_slots` to clear slots with persistent stage mismatches, improving the relevance of exercise suggestions. - Updated `collect_gap_fill_specs` to handle stage mismatch issues more effectively, providing clearer rationale and title hints for off-topic exercises. - Modified `_filter_learning_goal_candidate_ids` to enforce stricter filtering criteria, ensuring only relevant candidates are considered. - Enhanced `rematch_roadmap_slots` to incorporate slot assignment history, preventing conflicts with previously assigned exercises. - Bumped version to 0.8.230 to reflect the new features and improvements.
2026-06-12 07:57:19 +02:00 · 2026-06-12 07:57:19 +02:00 · d448c3191f
commit d448c3191f
parent 8a4be795f4
6 changed files with 201 additions and 35 deletions
--- a/backend/planning_exercise_path_ai_fill.py
+++ b/backend/planning_exercise_path_ai_fill.py
@ -425,9 +425,22 @@ def collect_gap_fill_specs(
        step_a, step_b = _step_neighbors_at_index(steps, idx)
        phase = ot.get("expected_phase") or "vertiefung"
        insert_after = max(idx - 1, -1)
        stage_goal = str(ot.get("roadmap_learning_goal") or "").strip()
        if str(ot.get("issue") or "") == "stage_mismatch" and stage_goal:
            title_hint = stage_goal[:120]
            rationale = (
                f"Keine passende Bibliotheks-Übung für Stufen-Lernziel „{stage_goal[:100]}“."
            )
            sketch_rationale = (
                f"Slot braucht Übung passend zu: {stage_goal[:200]}"
            )
        else:
            title_hint = f"{topic} — {phase} (Ersatz für themenfremden Schritt)"
            rationale = f"Schritt „{ot.get('title')}“ passt nicht zum Pfad-Thema."
            sketch_rationale = f"Ersetzt themenfremden Schritt „{ot.get('title')}“."
        add(
            {
-                "source": "off_topic",
+                "source": "off_topic" if ot.get("issue") != "stage_mismatch" else "stage_mismatch",
                "insert_after_index": insert_after,
                "replace_step_index": idx,
                "roadmap_major_step_index": major_idx,
@ -435,18 +448,19 @@ def collect_gap_fill_specs(
                    "expected_phase": phase,
                    "off_topic_title": ot.get("title"),
                    "off_topic_exercise_id": ot.get("exercise_id"),
                    "roadmap_learning_goal": stage_goal or None,
                },
                "phase": phase,
-                "title_hint": f"{topic} — {phase} (Ersatz für themenfremden Schritt)",
+                "title_hint": title_hint,
                "sketch": _default_sketch(
                    goal_query=goal_query,
                    brief=brief,
                    step_a=step_a,
                    step_b=step_b,
                    phase=str(phase),
-                    rationale=f"Ersetzt themenfremden Schritt „{ot.get('title')}“.",
+                    rationale=sketch_rationale,
                ),
-                "rationale": f"Schritt „{ot.get('title')}“ passt nicht zum Pfad-Thema.",
+                "rationale": rationale,
            }
        )
--- a/backend/planning_exercise_path_builder.py
+++ b/backend/planning_exercise_path_builder.py
@ -269,7 +269,7 @@ def _filter_learning_goal_candidate_ids(
            anti_patterns=stage_anti,
            path_primary_topic=path_primary or None,
            path_technique_excludes=path_tech_excludes,
-            relaxed=True,
+            relaxed=False,
        ):
            out.append(eid)
    return out
@ -1322,6 +1322,78 @@ def _normalize_roadmap_steps_coverage(
    return out
 def _purge_stage_mismatch_roadmap_slots(
    cur,
    *,
    steps: List[Dict[str, Any]],
    roadmap_ctx: ProgressionRoadmapContext,
    goal_query: str,
    semantic_brief: PlanningSemanticBrief,
 ) -> Tuple[List[Dict[str, Any]], List[Tuple[int, StageSpecArtifact]]]:
    """Leert Slots mit persistentem stage_mismatch — KI-Gap statt schlechter Bibliotheks-Übung."""
    issues = detect_off_topic_steps(
        cur,
        steps,
        brief=semantic_brief,
        goal_query=goal_query,
    )
    purge_majors: Set[int] = set()
    for item in issues:
        if str(item.get("issue") or "") != "stage_mismatch":
            continue
        midx = item.get("roadmap_major_step_index")
        if midx is None:
            continue
        try:
            purge_majors.add(int(midx))
        except (TypeError, ValueError):
            continue
    if not purge_majors:
        return steps, []
    stage_specs = list(roadmap_ctx.stage_specs or [])
    spec_by_major = {int(s.major_step_index): s for s in stage_specs}
    major_by_index: Dict[int, MajorStep] = {}
    if roadmap_ctx.roadmap:
        major_by_index = {m.index: m for m in roadmap_ctx.roadmap.major_steps}
    new_unfilled: List[Tuple[int, StageSpecArtifact]] = []
    out: List[Dict[str, Any]] = []
    for raw in steps:
        step = dict(raw)
        midx = step.get("roadmap_major_step_index")
        if midx is None or int(midx) not in purge_majors:
            out.append(step)
            continue
        major_idx = int(midx)
        spec = spec_by_major.get(major_idx)
        if spec is None:
            out.append(step)
            continue
        step_index = next(
            (i for i, sp in enumerate(stage_specs) if int(sp.major_step_index) == major_idx),
            major_idx,
        )
        major = major_by_index.get(major_idx)
        goal = (spec.learning_goal or step.get("roadmap_learning_goal") or "").strip()
        out.append(
            {
                "exercise_id": None,
                "variant_id": None,
                "title": goal or f"Slot {major_idx + 1}",
                "is_ai_proposal": False,
                "roadmap_major_step_index": major_idx,
                "roadmap_phase": major.phase if major else step.get("roadmap_phase"),
                "roadmap_learning_goal": goal or None,
                "roadmap_match_source": "unfilled",
                "slot_status": "unfilled",
                "reasons": ["Keine passende Bibliotheks-Übung für Stufen-Lernziel"],
            }
        )
        new_unfilled.append((step_index, spec))
    return out, new_unfilled
 def _merge_rematch_unfilled(
    roadmap_unfilled: List[Tuple[int, StageSpecArtifact]],
    rematch_new_unfilled: List[Tuple[int, StageSpecArtifact]],
@ -1401,6 +1473,16 @@ def _run_roadmap_rematch_loop(
    _track_rejected(off_topic_before_strip)
    _track_rejected(current_stripped)
    slot_assignment_history: Dict[int, Set[int]] = {}
    for raw in steps:
        midx = raw.get("roadmap_major_step_index")
        eid = raw.get("exercise_id")
        if midx is None or eid is None:
            continue
        try:
            slot_assignment_history.setdefault(int(midx), set()).add(int(eid))
        except (TypeError, ValueError):
            continue
    for round_idx in range(max_rounds):
        mini_qa = run_multistage_path_qa(
@ -1462,6 +1544,7 @@ def _run_roadmap_rematch_loop(
            rematch_reasons=rematch_reasons,
            match_slot_fn=_match_roadmap_slot,
            rejected_by_major=rejected_by_major,
            slot_assignment_history=slot_assignment_history,
        )
        rematch_rounds += 1
        for entry in round_log:
@ -1475,6 +1558,16 @@ def _run_roadmap_rematch_loop(
                    rejected_by_major.setdefault(int(midx), set()).add(int(rid))
                except (TypeError, ValueError):
                    pass
            new_eid = entry.get("new_exercise_id")
            if (
                str(entry.get("action") or "") == "replaced"
                and new_eid is not None
                and midx is not None
            ):
                try:
                    slot_assignment_history.setdefault(int(midx), set()).add(int(new_eid))
                except (TypeError, ValueError):
                    pass
        current_stripped = prune_stripped_after_rematch(current_stripped, round_log)
        roadmap_unfilled = _merge_rematch_unfilled(roadmap_unfilled, rematch_new_unfilled)
@ -1500,6 +1593,22 @@ def _run_roadmap_rematch_loop(
            goal_query=goal_query,
        )
    steps, purged_unfilled = _purge_stage_mismatch_roadmap_slots(
        cur,
        steps=steps,
        roadmap_ctx=roadmap_ctx,
        goal_query=goal_query,
        semantic_brief=semantic_brief,
    )
    if purged_unfilled:
        roadmap_unfilled = _merge_rematch_unfilled(roadmap_unfilled, purged_unfilled)
        off_topic_steps = detect_off_topic_steps(
            cur,
            steps,
            brief=semantic_brief,
            goal_query=goal_query,
        )
    return (
        steps,
        rematch_log,
--- a/backend/planning_exercise_semantics.py
+++ b/backend/planning_exercise_semantics.py
@ -865,6 +865,11 @@ def stage_focus_phrases_from_learning_goal(learning_goal: str) -> List[str]:
    tokens = _significant_stage_tokens(lg, strip_negated=True)
    phrases: List[str] = []
    norm_lg = _normalize_phrase(lg)
    tech_hit = _find_technique_in_text(norm_lg)
    if tech_hit:
        primary = tech_hit[0]
        if primary not in phrases:
            phrases.append(primary)
    if len(norm_lg) >= 8:
        phrases.append(norm_lg[:120])
    for i in range(len(tokens) - 1):
@ -879,14 +884,22 @@ def stage_focus_phrases_from_learning_goal(learning_goal: str) -> List[str]:
 def stage_refinement_criteria_from_learning_goal(learning_goal: str) -> List[str]:
    """Erfolgskriterien für Phase C — nur aussagekräftige Mehrwort-Phrasen."""
    lg = (learning_goal or "").strip()
    if len(lg) < 3:
        return []
    norm_lg = _normalize_phrase(lg)
    out: List[str] = []
-    for phrase in stage_focus_phrases_from_learning_goal(learning_goal):
+    if len(norm_lg) >= 15:
-        p = str(phrase or "").strip()
+        out.append(norm_lg[:120])
-        if not p:
+    tokens = _significant_stage_tokens(lg, strip_negated=True)
    for i in range(len(tokens) - 1):
        a, b = tokens[i], tokens[i + 1]
        if len(a) < 5 or len(b) < 5:
            continue
-        if " " in p or len(p) >= 12:
+        pair = f"{a} {b}"
-            out.append(p[:120])
+        if len(pair) >= 12 and pair not in out:
-    return out[:4]
+            out.append(pair)
    return out[:3]
 def exercise_title_matches_peer_stage_goal(
@ -1095,6 +1108,9 @@ def build_stage_match_brief(
    constraints = parse_stage_goal_constraints(lg)
    must: List[str] = []
    norm_lg = _normalize_phrase(lg)
    tech_hit = _find_technique_in_text(norm_lg)
    if tech_hit and tech_hit[0] not in must:
        must.insert(0, tech_hit[0])
    if primary_path and primary_path not in must:
        must.insert(0, primary_path[:120])
    for token in constraints.positive_tokens:
@ -1165,12 +1181,15 @@ def score_exercise_stage_fit(
        if part.lower().startswith("lernziel:"):
            lg_hint = part.split(":", 1)[-1].strip()
            break
    if not lg_hint:
        lg_hint = (stage_brief.retrieval_query or "").split("|")[0].strip()
    if not lg_hint:
        for mp in stage_brief.must_phrases or []:
            if mp and len(_normalize_phrase(mp)) >= 8:
                lg_hint = mp
                break
    focus_phrases = stage_focus_phrases_from_learning_goal(lg_hint) if lg_hint else []
    tech_hit = _find_technique_in_text(_normalize_phrase(lg_hint)) if lg_hint else None
    if not focus_phrases:
        focus_phrases = [
            t
@ -1185,6 +1204,16 @@ def score_exercise_stage_fit(
            score = min(1.0, score + bonus)
            if hits >= max(1, len(focus_phrases) // 2):
                reasons = ["Stufen-Schwerpunkte im Übungstext", *reasons]
        non_tech = [
            p
            for p in focus_phrases
            if not tech_hit or _normalize_phrase(p) != tech_hit[0]
        ]
        specific_hits = sum(1 for p in non_tech if _phrase_in_blob(p, blob))
        if tech_hit and _phrase_in_blob(tech_hit[0], blob) and specific_hits == 0:
            score = min(score, 0.16)
            if "Nur Technik-Bezug" not in reasons:
                reasons = ["Nur Technik-Bezug, Stufen-Schwerpunkte fehlen", *reasons]
    learning_goal_for_equiv = lg_hint or (stage_brief.must_phrases[0] if stage_brief.must_phrases else "")
    if learning_goal_for_equiv and exercise_title_equivalent_to_stage_goal(title, learning_goal_for_equiv):
        score = max(score, 0.42)
@ -1246,8 +1275,6 @@ def exercise_passes_stage_fit(
        learning_goal=lg,
        anti_patterns=anti_patterns,
    )
    stage_sem = stage_semantic_score
    if stage_sem is None:
    stage_sem, _ = score_exercise_stage_fit(
        title=title,
        summary=summary,
@ -1262,7 +1289,19 @@ def exercise_passes_stage_fit(
        threshold = _MIN_TITLE_EQUIV_SEMANTIC
    else:
        threshold = min_stage_semantic
-    return float(stage_sem or 0.0) >= threshold
+
    if float(stage_sem or 0.0) >= threshold:
        return True
    if relaxed and not title_equiv:
        focus = stage_focus_phrases_from_learning_goal(lg)
        tech = _find_technique_in_text(_normalize_phrase(lg))
        non_tech = [p for p in focus if not tech or _normalize_phrase(p) != tech[0]]
        specific_hits = sum(1 for p in non_tech if _phrase_in_blob(p, blob))
        if specific_hits >= 2 and float(stage_sem or 0.0) >= 0.14:
            return True
    return False
 def apply_stage_match_retrieval_weights(brief: PlanningSemanticBrief) -> Dict[str, float]:
@ -1539,16 +1578,7 @@ def pick_best_path_hit(
        chosen = _scan(strict=False)
        if chosen:
            return chosen
-        return _pick_roadmap_rank_fallback(
+        return None
            hits,
            used_exercise_ids,
            stage_learning_goal=stage_goal,
            stage_anti_patterns=stage_anti_patterns,
            path_primary_topic=path_primary_topic,
            path_technique_excludes=path_technique_excludes,
            stage_match_brief=stage_brief,
            peer_learning_goals=peer_learning_goals,
        )
    chosen = _scan(strict=False)
    if chosen:
--- a/backend/planning_path_rematch.py
+++ b/backend/planning_path_rematch.py
@ -116,6 +116,7 @@ def rematch_roadmap_slots(
    rematch_reasons: Mapping[int, str],
    match_slot_fn,
    rejected_by_major: Optional[Mapping[int, Set[int]]] = None,
    slot_assignment_history: Optional[Mapping[int, Set[int]]] = None,
 ) -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]], List[Tuple[int, StageSpecArtifact]]]:
    """
    Ersetzt nur betroffene Slots; andere Schritte und used-Set bleiben konsistent.
@ -180,6 +181,18 @@ def rematch_roadmap_slots(
        )
        reason = str(rematch_reasons.get(int(major_idx)) or "rematch_slot")
        if new_step:
            try:
                new_eid = int(new_step.get("exercise_id") or 0)
            except (TypeError, ValueError):
                new_eid = 0
            hist = (
                slot_assignment_history.get(int(major_idx), set())
                if slot_assignment_history
                else set()
            )
            if new_eid > 0 and new_eid in hist:
                new_step = None
        if new_step:
            steps_by_major[int(major_idx)] = new_step
            rematch_log.append(
--- a/backend/tests/test_planning_roadmap_stage_match.py
+++ b/backend/tests/test_planning_roadmap_stage_match.py
@ -270,8 +270,8 @@ def test_pick_roadmap_relaxed_for_non_technique_stage():
        {
            "id": 11,
            "title": "Adduktoren Dehnung am Boden",
-            "summary": "Flexibilität Hüfte",
+            "summary": "Flexibilität Hüfte, Adduktoren dehnen",
-            "goal": "Mobilität",
+            "goal": "Mobilität — Adduktoren dehnen",
            "score": 0.68,
            "semantic_score": 0.22,
            "stage_semantic_score": 0.22,
--- a/backend/version.py
+++ b/backend/version.py
@ -1,6 +1,6 @@
 # Shinkan Jinkendo Version Information
-APP_VERSION = "0.8.229"
+APP_VERSION = "0.8.230"
 BUILD_DATE = "2026-05-22"
 DB_SCHEMA_VERSION = "20260607090"
@ -38,7 +38,7 @@ MODULE_VERSIONS = {
    "skill_profiles": "1.0.0",  # Phase 3: gewichtetes Fähigkeiten-Profil + skill-discovery/suggestions
    "methods": "0.1.0",
    "exercises": "2.37.1",  # KI-Endpoints: feature_usage nach ai_calls consume
-    "planning_exercise_suggest": "0.23.4",  # Stufen-Match: Fallback mit Gate, Peer-Slot-Schutz, LG-Kandidaten-Filter
+    "planning_exercise_suggest": "0.23.5",  # Roadmap-Match strikt; stage_mismatch → unfilled + KI-Gap
    "training_units": "0.4.0",  # POST .../publish-to-framework: Ablauf aus geplanter Einheit → Rahmen-Slot-Blueprint
    "training_programs": "0.1.0",
    "planning": "0.15.0",  # Vorlagen: Strukturvorschau, Bearbeiten inkl. Split-Sessions + Beschreibung