From d448c3191f30fadd925c3814c7c26b64a25079bd Mon Sep 17 00:00:00 2001 From: Lars Date: Fri, 12 Jun 2026 07:57:19 +0200 Subject: [PATCH] Enhance Stage Mismatch Handling and Roadmap Slot Purging - Introduced `_purge_stage_mismatch_roadmap_slots` to clear slots with persistent stage mismatches, improving the relevance of exercise suggestions. - Updated `collect_gap_fill_specs` to handle stage mismatch issues more effectively, providing clearer rationale and title hints for off-topic exercises. - Modified `_filter_learning_goal_candidate_ids` to enforce stricter filtering criteria, ensuring only relevant candidates are considered. - Enhanced `rematch_roadmap_slots` to incorporate slot assignment history, preventing conflicts with previously assigned exercises. - Bumped version to 0.8.230 to reflect the new features and improvements. --- backend/planning_exercise_path_ai_fill.py | 22 +++- backend/planning_exercise_path_builder.py | 111 +++++++++++++++++- backend/planning_exercise_semantics.py | 82 +++++++++---- backend/planning_path_rematch.py | 13 ++ .../test_planning_roadmap_stage_match.py | 4 +- backend/version.py | 4 +- 6 files changed, 201 insertions(+), 35 deletions(-) diff --git a/backend/planning_exercise_path_ai_fill.py b/backend/planning_exercise_path_ai_fill.py index 81373bf..a679584 100644 --- a/backend/planning_exercise_path_ai_fill.py +++ b/backend/planning_exercise_path_ai_fill.py @@ -425,9 +425,22 @@ def collect_gap_fill_specs( step_a, step_b = _step_neighbors_at_index(steps, idx) phase = ot.get("expected_phase") or "vertiefung" insert_after = max(idx - 1, -1) + stage_goal = str(ot.get("roadmap_learning_goal") or "").strip() + if str(ot.get("issue") or "") == "stage_mismatch" and stage_goal: + title_hint = stage_goal[:120] + rationale = ( + f"Keine passende Bibliotheks-Übung für Stufen-Lernziel „{stage_goal[:100]}“." + ) + sketch_rationale = ( + f"Slot braucht Übung passend zu: {stage_goal[:200]}" + ) + else: + title_hint = f"{topic} — {phase} (Ersatz für themenfremden Schritt)" + rationale = f"Schritt „{ot.get('title')}“ passt nicht zum Pfad-Thema." + sketch_rationale = f"Ersetzt themenfremden Schritt „{ot.get('title')}“." add( { - "source": "off_topic", + "source": "off_topic" if ot.get("issue") != "stage_mismatch" else "stage_mismatch", "insert_after_index": insert_after, "replace_step_index": idx, "roadmap_major_step_index": major_idx, @@ -435,18 +448,19 @@ def collect_gap_fill_specs( "expected_phase": phase, "off_topic_title": ot.get("title"), "off_topic_exercise_id": ot.get("exercise_id"), + "roadmap_learning_goal": stage_goal or None, }, "phase": phase, - "title_hint": f"{topic} — {phase} (Ersatz für themenfremden Schritt)", + "title_hint": title_hint, "sketch": _default_sketch( goal_query=goal_query, brief=brief, step_a=step_a, step_b=step_b, phase=str(phase), - rationale=f"Ersetzt themenfremden Schritt „{ot.get('title')}“.", + rationale=sketch_rationale, ), - "rationale": f"Schritt „{ot.get('title')}“ passt nicht zum Pfad-Thema.", + "rationale": rationale, } ) diff --git a/backend/planning_exercise_path_builder.py b/backend/planning_exercise_path_builder.py index 79c0610..384475a 100644 --- a/backend/planning_exercise_path_builder.py +++ b/backend/planning_exercise_path_builder.py @@ -269,7 +269,7 @@ def _filter_learning_goal_candidate_ids( anti_patterns=stage_anti, path_primary_topic=path_primary or None, path_technique_excludes=path_tech_excludes, - relaxed=True, + relaxed=False, ): out.append(eid) return out @@ -1322,6 +1322,78 @@ def _normalize_roadmap_steps_coverage( return out +def _purge_stage_mismatch_roadmap_slots( + cur, + *, + steps: List[Dict[str, Any]], + roadmap_ctx: ProgressionRoadmapContext, + goal_query: str, + semantic_brief: PlanningSemanticBrief, +) -> Tuple[List[Dict[str, Any]], List[Tuple[int, StageSpecArtifact]]]: + """Leert Slots mit persistentem stage_mismatch — KI-Gap statt schlechter Bibliotheks-Übung.""" + issues = detect_off_topic_steps( + cur, + steps, + brief=semantic_brief, + goal_query=goal_query, + ) + purge_majors: Set[int] = set() + for item in issues: + if str(item.get("issue") or "") != "stage_mismatch": + continue + midx = item.get("roadmap_major_step_index") + if midx is None: + continue + try: + purge_majors.add(int(midx)) + except (TypeError, ValueError): + continue + if not purge_majors: + return steps, [] + + stage_specs = list(roadmap_ctx.stage_specs or []) + spec_by_major = {int(s.major_step_index): s for s in stage_specs} + major_by_index: Dict[int, MajorStep] = {} + if roadmap_ctx.roadmap: + major_by_index = {m.index: m for m in roadmap_ctx.roadmap.major_steps} + + new_unfilled: List[Tuple[int, StageSpecArtifact]] = [] + out: List[Dict[str, Any]] = [] + for raw in steps: + step = dict(raw) + midx = step.get("roadmap_major_step_index") + if midx is None or int(midx) not in purge_majors: + out.append(step) + continue + major_idx = int(midx) + spec = spec_by_major.get(major_idx) + if spec is None: + out.append(step) + continue + step_index = next( + (i for i, sp in enumerate(stage_specs) if int(sp.major_step_index) == major_idx), + major_idx, + ) + major = major_by_index.get(major_idx) + goal = (spec.learning_goal or step.get("roadmap_learning_goal") or "").strip() + out.append( + { + "exercise_id": None, + "variant_id": None, + "title": goal or f"Slot {major_idx + 1}", + "is_ai_proposal": False, + "roadmap_major_step_index": major_idx, + "roadmap_phase": major.phase if major else step.get("roadmap_phase"), + "roadmap_learning_goal": goal or None, + "roadmap_match_source": "unfilled", + "slot_status": "unfilled", + "reasons": ["Keine passende Bibliotheks-Übung für Stufen-Lernziel"], + } + ) + new_unfilled.append((step_index, spec)) + return out, new_unfilled + + def _merge_rematch_unfilled( roadmap_unfilled: List[Tuple[int, StageSpecArtifact]], rematch_new_unfilled: List[Tuple[int, StageSpecArtifact]], @@ -1401,6 +1473,16 @@ def _run_roadmap_rematch_loop( _track_rejected(off_topic_before_strip) _track_rejected(current_stripped) + slot_assignment_history: Dict[int, Set[int]] = {} + for raw in steps: + midx = raw.get("roadmap_major_step_index") + eid = raw.get("exercise_id") + if midx is None or eid is None: + continue + try: + slot_assignment_history.setdefault(int(midx), set()).add(int(eid)) + except (TypeError, ValueError): + continue for round_idx in range(max_rounds): mini_qa = run_multistage_path_qa( @@ -1462,6 +1544,7 @@ def _run_roadmap_rematch_loop( rematch_reasons=rematch_reasons, match_slot_fn=_match_roadmap_slot, rejected_by_major=rejected_by_major, + slot_assignment_history=slot_assignment_history, ) rematch_rounds += 1 for entry in round_log: @@ -1475,6 +1558,16 @@ def _run_roadmap_rematch_loop( rejected_by_major.setdefault(int(midx), set()).add(int(rid)) except (TypeError, ValueError): pass + new_eid = entry.get("new_exercise_id") + if ( + str(entry.get("action") or "") == "replaced" + and new_eid is not None + and midx is not None + ): + try: + slot_assignment_history.setdefault(int(midx), set()).add(int(new_eid)) + except (TypeError, ValueError): + pass current_stripped = prune_stripped_after_rematch(current_stripped, round_log) roadmap_unfilled = _merge_rematch_unfilled(roadmap_unfilled, rematch_new_unfilled) @@ -1500,6 +1593,22 @@ def _run_roadmap_rematch_loop( goal_query=goal_query, ) + steps, purged_unfilled = _purge_stage_mismatch_roadmap_slots( + cur, + steps=steps, + roadmap_ctx=roadmap_ctx, + goal_query=goal_query, + semantic_brief=semantic_brief, + ) + if purged_unfilled: + roadmap_unfilled = _merge_rematch_unfilled(roadmap_unfilled, purged_unfilled) + off_topic_steps = detect_off_topic_steps( + cur, + steps, + brief=semantic_brief, + goal_query=goal_query, + ) + return ( steps, rematch_log, diff --git a/backend/planning_exercise_semantics.py b/backend/planning_exercise_semantics.py index 7fe9521..c0433f7 100644 --- a/backend/planning_exercise_semantics.py +++ b/backend/planning_exercise_semantics.py @@ -865,6 +865,11 @@ def stage_focus_phrases_from_learning_goal(learning_goal: str) -> List[str]: tokens = _significant_stage_tokens(lg, strip_negated=True) phrases: List[str] = [] norm_lg = _normalize_phrase(lg) + tech_hit = _find_technique_in_text(norm_lg) + if tech_hit: + primary = tech_hit[0] + if primary not in phrases: + phrases.append(primary) if len(norm_lg) >= 8: phrases.append(norm_lg[:120]) for i in range(len(tokens) - 1): @@ -879,14 +884,22 @@ def stage_focus_phrases_from_learning_goal(learning_goal: str) -> List[str]: def stage_refinement_criteria_from_learning_goal(learning_goal: str) -> List[str]: """Erfolgskriterien für Phase C — nur aussagekräftige Mehrwort-Phrasen.""" + lg = (learning_goal or "").strip() + if len(lg) < 3: + return [] + norm_lg = _normalize_phrase(lg) out: List[str] = [] - for phrase in stage_focus_phrases_from_learning_goal(learning_goal): - p = str(phrase or "").strip() - if not p: + if len(norm_lg) >= 15: + out.append(norm_lg[:120]) + tokens = _significant_stage_tokens(lg, strip_negated=True) + for i in range(len(tokens) - 1): + a, b = tokens[i], tokens[i + 1] + if len(a) < 5 or len(b) < 5: continue - if " " in p or len(p) >= 12: - out.append(p[:120]) - return out[:4] + pair = f"{a} {b}" + if len(pair) >= 12 and pair not in out: + out.append(pair) + return out[:3] def exercise_title_matches_peer_stage_goal( @@ -1095,6 +1108,9 @@ def build_stage_match_brief( constraints = parse_stage_goal_constraints(lg) must: List[str] = [] norm_lg = _normalize_phrase(lg) + tech_hit = _find_technique_in_text(norm_lg) + if tech_hit and tech_hit[0] not in must: + must.insert(0, tech_hit[0]) if primary_path and primary_path not in must: must.insert(0, primary_path[:120]) for token in constraints.positive_tokens: @@ -1165,12 +1181,15 @@ def score_exercise_stage_fit( if part.lower().startswith("lernziel:"): lg_hint = part.split(":", 1)[-1].strip() break + if not lg_hint: + lg_hint = (stage_brief.retrieval_query or "").split("|")[0].strip() if not lg_hint: for mp in stage_brief.must_phrases or []: if mp and len(_normalize_phrase(mp)) >= 8: lg_hint = mp break focus_phrases = stage_focus_phrases_from_learning_goal(lg_hint) if lg_hint else [] + tech_hit = _find_technique_in_text(_normalize_phrase(lg_hint)) if lg_hint else None if not focus_phrases: focus_phrases = [ t @@ -1185,6 +1204,16 @@ def score_exercise_stage_fit( score = min(1.0, score + bonus) if hits >= max(1, len(focus_phrases) // 2): reasons = ["Stufen-Schwerpunkte im Übungstext", *reasons] + non_tech = [ + p + for p in focus_phrases + if not tech_hit or _normalize_phrase(p) != tech_hit[0] + ] + specific_hits = sum(1 for p in non_tech if _phrase_in_blob(p, blob)) + if tech_hit and _phrase_in_blob(tech_hit[0], blob) and specific_hits == 0: + score = min(score, 0.16) + if "Nur Technik-Bezug" not in reasons: + reasons = ["Nur Technik-Bezug, Stufen-Schwerpunkte fehlen", *reasons] learning_goal_for_equiv = lg_hint or (stage_brief.must_phrases[0] if stage_brief.must_phrases else "") if learning_goal_for_equiv and exercise_title_equivalent_to_stage_goal(title, learning_goal_for_equiv): score = max(score, 0.42) @@ -1246,15 +1275,13 @@ def exercise_passes_stage_fit( learning_goal=lg, anti_patterns=anti_patterns, ) - stage_sem = stage_semantic_score - if stage_sem is None: - stage_sem, _ = score_exercise_stage_fit( - title=title, - summary=summary, - goal=goal, - stage_brief=brief, - step_phase=step_phase, - ) + stage_sem, _ = score_exercise_stage_fit( + title=title, + summary=summary, + goal=goal, + stage_brief=brief, + step_phase=step_phase, + ) if relaxed: threshold = _MIN_STAGE_FIT_RELAXED @@ -1262,7 +1289,19 @@ def exercise_passes_stage_fit( threshold = _MIN_TITLE_EQUIV_SEMANTIC else: threshold = min_stage_semantic - return float(stage_sem or 0.0) >= threshold + + if float(stage_sem or 0.0) >= threshold: + return True + + if relaxed and not title_equiv: + focus = stage_focus_phrases_from_learning_goal(lg) + tech = _find_technique_in_text(_normalize_phrase(lg)) + non_tech = [p for p in focus if not tech or _normalize_phrase(p) != tech[0]] + specific_hits = sum(1 for p in non_tech if _phrase_in_blob(p, blob)) + if specific_hits >= 2 and float(stage_sem or 0.0) >= 0.14: + return True + + return False def apply_stage_match_retrieval_weights(brief: PlanningSemanticBrief) -> Dict[str, float]: @@ -1539,16 +1578,7 @@ def pick_best_path_hit( chosen = _scan(strict=False) if chosen: return chosen - return _pick_roadmap_rank_fallback( - hits, - used_exercise_ids, - stage_learning_goal=stage_goal, - stage_anti_patterns=stage_anti_patterns, - path_primary_topic=path_primary_topic, - path_technique_excludes=path_technique_excludes, - stage_match_brief=stage_brief, - peer_learning_goals=peer_learning_goals, - ) + return None chosen = _scan(strict=False) if chosen: diff --git a/backend/planning_path_rematch.py b/backend/planning_path_rematch.py index 9adcb5c..dc042ee 100644 --- a/backend/planning_path_rematch.py +++ b/backend/planning_path_rematch.py @@ -116,6 +116,7 @@ def rematch_roadmap_slots( rematch_reasons: Mapping[int, str], match_slot_fn, rejected_by_major: Optional[Mapping[int, Set[int]]] = None, + slot_assignment_history: Optional[Mapping[int, Set[int]]] = None, ) -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]], List[Tuple[int, StageSpecArtifact]]]: """ Ersetzt nur betroffene Slots; andere Schritte und used-Set bleiben konsistent. @@ -180,6 +181,18 @@ def rematch_roadmap_slots( ) reason = str(rematch_reasons.get(int(major_idx)) or "rematch_slot") + if new_step: + try: + new_eid = int(new_step.get("exercise_id") or 0) + except (TypeError, ValueError): + new_eid = 0 + hist = ( + slot_assignment_history.get(int(major_idx), set()) + if slot_assignment_history + else set() + ) + if new_eid > 0 and new_eid in hist: + new_step = None if new_step: steps_by_major[int(major_idx)] = new_step rematch_log.append( diff --git a/backend/tests/test_planning_roadmap_stage_match.py b/backend/tests/test_planning_roadmap_stage_match.py index 21ae1c2..442505d 100644 --- a/backend/tests/test_planning_roadmap_stage_match.py +++ b/backend/tests/test_planning_roadmap_stage_match.py @@ -270,8 +270,8 @@ def test_pick_roadmap_relaxed_for_non_technique_stage(): { "id": 11, "title": "Adduktoren Dehnung am Boden", - "summary": "Flexibilität Hüfte", - "goal": "Mobilität", + "summary": "Flexibilität Hüfte, Adduktoren dehnen", + "goal": "Mobilität — Adduktoren dehnen", "score": 0.68, "semantic_score": 0.22, "stage_semantic_score": 0.22, diff --git a/backend/version.py b/backend/version.py index aea1e0c..17b1603 100644 --- a/backend/version.py +++ b/backend/version.py @@ -1,6 +1,6 @@ # Shinkan Jinkendo Version Information -APP_VERSION = "0.8.229" +APP_VERSION = "0.8.230" BUILD_DATE = "2026-05-22" DB_SCHEMA_VERSION = "20260607090" @@ -38,7 +38,7 @@ MODULE_VERSIONS = { "skill_profiles": "1.0.0", # Phase 3: gewichtetes Fähigkeiten-Profil + skill-discovery/suggestions "methods": "0.1.0", "exercises": "2.37.1", # KI-Endpoints: feature_usage nach ai_calls consume - "planning_exercise_suggest": "0.23.4", # Stufen-Match: Fallback mit Gate, Peer-Slot-Schutz, LG-Kandidaten-Filter + "planning_exercise_suggest": "0.23.5", # Roadmap-Match strikt; stage_mismatch → unfilled + KI-Gap "training_units": "0.4.0", # POST .../publish-to-framework: Ablauf aus geplanter Einheit → Rahmen-Slot-Blueprint "training_programs": "0.1.0", "planning": "0.15.0", # Vorlagen: Strukturvorschau, Bearbeiten inkl. Split-Sessions + Beschreibung