From b2fbf6b4afba1d17b6d210babd16b38726a58871 Mon Sep 17 00:00:00 2001 From: Lars Date: Thu, 11 Jun 2026 12:45:53 +0200 Subject: [PATCH] Refactor Roadmap Step Annotation and Slot Assignment Logic - Updated `_annotate_roadmap_step` to change the condition for setting `slot_status` based on `roadmap_match_source`, improving clarity in slot assignment handling. - Removed the `_try_reconcile_slot_assignment` function to streamline the slot assignment process, as its logic is now integrated into the main flow. - Enhanced `_match_roadmap_slot` to conditionally preserve slot assignments based on exercise ID, ensuring better handling of existing assignments. - Improved the handling of semantic scores in `rank_visible_library_hits` to prioritize the best semantic fit, enhancing exercise retrieval accuracy. - Added tests to validate the new logic for title equivalence and semantic scoring, ensuring robustness in exercise selection processes. --- backend/planning_exercise_path_builder.py | 124 ++---------------- backend/planning_exercise_path_qa.py | 7 - backend/planning_exercise_retrieval.py | 27 +++- backend/planning_exercise_semantics.py | 19 ++- .../test_planning_roadmap_stage_match.py | 43 +++++- 5 files changed, 91 insertions(+), 129 deletions(-) diff --git a/backend/planning_exercise_path_builder.py b/backend/planning_exercise_path_builder.py index 1783275..143c804 100644 --- a/backend/planning_exercise_path_builder.py +++ b/backend/planning_exercise_path_builder.py @@ -816,7 +816,7 @@ def _annotate_roadmap_step( step["roadmap_match_source"] = "stage_spec" if step.get("exercise_id") is not None: step["slot_status"] = step.get("slot_status") or ( - "preserved" if step.get("roadmap_match_source") == "slot_reconciled" else "matched" + "preserved" if step.get("roadmap_match_source") == "slot_best_match" else "matched" ) else: step["slot_status"] = step.get("slot_status") or "unfilled" @@ -825,84 +825,6 @@ def _annotate_roadmap_step( return step -def _try_reconcile_slot_assignment( - cur, - *, - assignment: EvaluateStepPayload, - stage_spec: StageSpecArtifact, - major_step: Optional[MajorStep], - tenant: TenantContext, - progression_graph_id: Optional[int], - stage_match_brief: Optional[PlanningSemanticBrief], - stage_goal: str, - stage_anti: Optional[List[str]], - path_primary: str, - path_tech_excludes: Optional[List[str]], -) -> Optional[Dict[str, Any]]: - """ - Bestehende Slot-Zuordnung behalten, wenn sie noch zum Stufen-Lernziel passt. - - Validiert gegen dieselben Gates wie Match/QA (relaxed), inkl. Titel-Äquivalenz. - """ - from planning_exercise_semantics import ( - exercise_passes_stage_fit, - exercise_title_equivalent_to_stage_goal, - ) - - step = _path_step_from_slot_assignment( - cur, - assignment=assignment, - stage_spec=stage_spec, - major_step=major_step, - tenant=tenant, - progression_graph_id=progression_graph_id, - ) - if not step: - return None - - title = str(step.get("title") or "").strip() - summary = str(step.get("summary") or "").strip() - goal = "" - cur.execute("SELECT goal FROM exercises WHERE id = %s", (int(step["exercise_id"]),)) - grow = cur.fetchone() - if grow: - goal = str(grow.get("goal") or "").strip() - - lg = (stage_goal or stage_spec.learning_goal or "").strip() - if exercise_title_equivalent_to_stage_goal(title, lg): - step["roadmap_match_source"] = "slot_reconciled" - step["slot_status"] = "preserved" - step["reasons"] = ["Bestehende Zuordnung (Titel = Lernziel)"] + list(step.get("reasons") or [])[:2] - return _annotate_roadmap_step( - step, - stage_spec=stage_spec, - major_step=major_step, - anti_patterns_override=stage_anti, - ) - - if exercise_passes_stage_fit( - learning_goal=lg, - title=title, - summary=summary, - goal=goal, - stage_brief=stage_match_brief, - anti_patterns=stage_anti, - path_primary_topic=path_primary or None, - path_technique_excludes=path_tech_excludes, - relaxed=True, - ): - step["roadmap_match_source"] = "slot_reconciled" - step["slot_status"] = "preserved" - step["reasons"] = ["Bestehende Zuordnung (Stufen-Fit)"] + list(step.get("reasons") or [])[:2] - return _annotate_roadmap_step( - step, - stage_spec=stage_spec, - major_step=major_step, - anti_patterns_override=stage_anti, - ) - return None - - def _stage_validation_context_for_spec( cur, *, @@ -1138,7 +1060,16 @@ def _match_roadmap_slot( skill_expectations=skill_exp_api, anti_patterns_override=stage_anti, ) - step["slot_status"] = "matched" + if ( + slot_priority_exercise_id is not None + and int(step["exercise_id"]) == int(slot_priority_exercise_id) + ): + step["slot_status"] = "preserved" + step["roadmap_match_source"] = "slot_best_match" + step["reasons"] = ["Bester Treffer (bestehende Zuordnung)"] + list(step.get("reasons") or [])[:2] + else: + step["slot_status"] = "matched" + step["roadmap_match_source"] = "stage_spec" return step, None @@ -1307,39 +1238,6 @@ def _build_steps_roadmap_first( slot_priority_id: Optional[int] = None if major_idx in assignments: - ctx = _stage_validation_context_for_spec( - cur, - body=body, - goal_query=goal_query, - semantic_brief=semantic_brief, - path_target_profile=path_target_profile, - roadmap_ctx=roadmap_ctx, - stage_spec=stage_spec, - step_index=step_index, - stage_count=stage_count, - major=major, - ) - reconciled = _try_reconcile_slot_assignment( - cur, - assignment=assignments[major_idx], - stage_spec=stage_spec, - major_step=major, - tenant=tenant, - progression_graph_id=body.progression_graph_id, - stage_match_brief=ctx["stage_match_brief"], - stage_goal=ctx["stage_goal"], - stage_anti=ctx["stage_anti"], - path_primary=ctx["path_primary"], - path_tech_excludes=ctx["path_tech_excludes"], - ) - if reconciled: - steps.append(reconciled) - eid = int(reconciled["exercise_id"]) - used.add(eid) - planned_ids.append(eid) - anchor_id = eid - anchor_variant_id = reconciled.get("variant_id") - continue try: slot_priority_id = int(assignments[major_idx].exercise_id) except (TypeError, ValueError): diff --git a/backend/planning_exercise_path_qa.py b/backend/planning_exercise_path_qa.py index 4cb3c85..d3e50a7 100644 --- a/backend/planning_exercise_path_qa.py +++ b/backend/planning_exercise_path_qa.py @@ -435,14 +435,7 @@ def detect_off_topic_steps( for idx, step in enumerate(steps): if step.get("is_ai_proposal") or step.get("exercise_id") is None: continue - stage_goal_early = (step.get("roadmap_learning_goal") or "").strip() bundle = _load_exercise_text_bundle(cur, int(step["exercise_id"])) - from planning_exercise_semantics import exercise_title_equivalent_to_stage_goal - - if stage_goal_early and exercise_title_equivalent_to_stage_goal( - bundle["title"], stage_goal_early - ): - continue blob = _blob_from_fields( bundle["title"], bundle["summary"], diff --git a/backend/planning_exercise_retrieval.py b/backend/planning_exercise_retrieval.py index deb0d13..085e9e7 100644 --- a/backend/planning_exercise_retrieval.py +++ b/backend/planning_exercise_retrieval.py @@ -431,8 +431,30 @@ def rank_visible_library_hits( step_phase=step_phase, ) + rank_stage_sem = stage_semantic_score + stage_lg = (stage_learning_goal or "").strip() + if roadmap_stage_match and stage_lg: + raw_brief = build_stage_match_brief( + learning_goal=stage_lg, + anti_patterns=pack.get("stage_anti_patterns"), + phase=step_phase, + ) + raw_sem, raw_reasons = score_exercise_stage_fit( + title=title_s, + summary=summary_s, + goal=goal_s, + variant_names=variants_by_ex.get(eid, []), + stage_brief=raw_brief, + step_phase=step_phase, + ) + rank_stage_sem = max(stage_semantic_score, raw_sem) + if raw_sem > stage_semantic_score and raw_reasons: + for rr in raw_reasons: + if rr not in stage_semantic_reasons: + stage_semantic_reasons.append(rr) + effective_semantic = ( - stage_semantic_score + rank_stage_sem if roadmap_stage_match and stage_match_brief else semantic_score ) @@ -461,7 +483,7 @@ def rank_visible_library_hits( summary=summary_s, goal=goal_s, stage_brief=stage_match_brief, - stage_semantic_score=stage_semantic_score, + stage_semantic_score=rank_stage_sem, anti_patterns=pack.get("stage_anti_patterns"), step_phase=step_phase, path_primary_topic=pack.get("path_primary_topic"), @@ -528,6 +550,7 @@ def rank_visible_library_hits( "reasons": reasons, "semantic_score": round(semantic_score, 4), "stage_semantic_score": round(stage_semantic_score, 4), + "stage_rank_semantic": round(rank_stage_sem, 4), "goal": goal_s, } ) diff --git a/backend/planning_exercise_semantics.py b/backend/planning_exercise_semantics.py index ecb9ac9..cfc79d6 100644 --- a/backend/planning_exercise_semantics.py +++ b/backend/planning_exercise_semantics.py @@ -954,6 +954,7 @@ def enrich_brief_with_path_constraints( _MIN_STAGE_FIT_SEMANTIC = 0.30 _MIN_STAGE_FIT_RELAXED = 0.20 +_MIN_TITLE_EQUIV_SEMANTIC = 0.15 def build_stage_match_brief( @@ -1101,8 +1102,7 @@ def exercise_passes_stage_fit( if constraints.exclude_phrases and _blob_matches_stage_excludes(blob, constraints.exclude_phrases): return False - if exercise_title_equivalent_to_stage_goal(title, learning_goal or lg): - return True + title_equiv = exercise_title_equivalent_to_stage_goal(title, learning_goal or lg) primary_path = (path_primary_topic or "").strip() if not primary_path and lg: @@ -1114,7 +1114,7 @@ def exercise_passes_stage_fit( for item in technique_sibling_excludes(primary_path): if item not in tech_excludes: tech_excludes.append(item) - if primary_path and not exercise_passes_technique_path_scope( + if primary_path and not title_equiv and not exercise_passes_technique_path_scope( primary_topic=primary_path, title=title, summary=summary, @@ -1139,7 +1139,12 @@ def exercise_passes_stage_fit( step_phase=step_phase, ) - threshold = _MIN_STAGE_FIT_RELAXED if relaxed else min_stage_semantic + if relaxed: + threshold = _MIN_STAGE_FIT_RELAXED + elif title_equiv: + threshold = _MIN_TITLE_EQUIV_SEMANTIC + else: + threshold = min_stage_semantic return float(stage_sem or 0.0) >= threshold @@ -1291,7 +1296,11 @@ def pick_best_path_hit( summary = str(hit.get("summary") or "") goal_text = str(hit.get("goal") or hit.get("exercise_goal") or "") sem = float(hit.get("semantic_score") or 0.0) - stage_sem = float(hit.get("stage_semantic_score") or sem) + stage_sem = float( + hit.get("stage_rank_semantic") + or hit.get("stage_semantic_score") + or sem + ) if roadmap_stage_match and stage_goal: if not exercise_passes_stage_fit( diff --git a/backend/tests/test_planning_roadmap_stage_match.py b/backend/tests/test_planning_roadmap_stage_match.py index e620c95..5da8869 100644 --- a/backend/tests/test_planning_roadmap_stage_match.py +++ b/backend/tests/test_planning_roadmap_stage_match.py @@ -354,18 +354,57 @@ def test_title_equivalent_to_stage_goal(): assert not exercise_title_equivalent_to_stage_goal("Kumite", "Hüftmobilität für Mae Geri") -def test_stage_fit_passes_for_title_equivalent_despite_missing_path_technique(): +def test_stage_fit_passes_for_title_equivalent_with_sufficient_semantic_score(): stage_goal = "Koordination Absprung ohne Kick" assert exercise_passes_stage_fit( learning_goal=stage_goal, title=stage_goal, - summary="", + summary="Absprung und Landung koordinieren", goal="", path_primary_topic="mawashi geri", path_technique_excludes=["kumite"], + stage_semantic_score=0.42, ) +def test_pick_best_prefers_semantic_fit_over_coincidental_title(): + stage_goal = "Hüftmobilität für Mawashi Geri" + stage_brief = build_stage_match_brief(learning_goal=stage_goal) + hits = [ + { + "id": 1, + "title": "Hüftmobilität für Mawashi Geri", + "summary": "allgemeine Aufwärmung", + "goal": "", + "score": 0.9, + "semantic_score": 0.12, + "stage_semantic_score": 0.12, + "stage_rank_semantic": 0.35, + }, + { + "id": 2, + "title": "Mawashi Hüftmobilität und Adduktoren", + "summary": "Dehnung Hüfte für Rundtritt", + "goal": "Mawashi Geri Hüftbeweglichkeit", + "score": 0.72, + "semantic_score": 0.58, + "stage_semantic_score": 0.58, + "stage_rank_semantic": 0.62, + }, + ] + chosen = pick_best_path_hit( + hits, + set(), + stage_learning_goal=stage_goal, + roadmap_stage_match=True, + stage_match_brief=stage_brief, + path_primary_topic="mawashi geri", + path_technique_excludes=technique_sibling_excludes("mawashi geri"), + ) + assert chosen is not None + assert int(chosen["id"]) == 2 + + def test_pick_roadmap_relaxed_with_path_primary_when_strict_fails(): """Bestehende Graph-Übungen: relaxed Gate auch bei gesetztem path_primary_topic.""" stage_goal = "Hüftmobilität für Mawashi Geri"