From b2fbf6b4afba1d17b6d210babd16b38726a58871 Mon Sep 17 00:00:00 2001
From: Lars <Lars@stommer.de>
Date: Thu, 11 Jun 2026 12:45:53 +0200
Subject: [PATCH] Refactor Roadmap Step Annotation and Slot Assignment Logic

- Updated `_annotate_roadmap_step` to change the condition for setting `slot_status` based on `roadmap_match_source`, improving clarity in slot assignment handling.
- Removed the `_try_reconcile_slot_assignment` function to streamline the slot assignment process, as its logic is now integrated into the main flow.
- Enhanced `_match_roadmap_slot` to conditionally preserve slot assignments based on exercise ID, ensuring better handling of existing assignments.
- Improved the handling of semantic scores in `rank_visible_library_hits` to prioritize the best semantic fit, enhancing exercise retrieval accuracy.
- Added tests to validate the new logic for title equivalence and semantic scoring, ensuring robustness in exercise selection processes.
---
 backend/planning_exercise_path_builder.py     | 124 ++----------------
 backend/planning_exercise_path_qa.py          |   7 -
 backend/planning_exercise_retrieval.py        |  27 +++-
 backend/planning_exercise_semantics.py        |  19 ++-
 .../test_planning_roadmap_stage_match.py      |  43 +++++-
 5 files changed, 91 insertions(+), 129 deletions(-)

diff --git a/backend/planning_exercise_path_builder.py b/backend/planning_exercise_path_builder.py
index 1783275..143c804 100644
--- a/backend/planning_exercise_path_builder.py
+++ b/backend/planning_exercise_path_builder.py
@@ -816,7 +816,7 @@ def _annotate_roadmap_step(
         step["roadmap_match_source"] = "stage_spec"
     if step.get("exercise_id") is not None:
         step["slot_status"] = step.get("slot_status") or (
-            "preserved" if step.get("roadmap_match_source") == "slot_reconciled" else "matched"
+            "preserved" if step.get("roadmap_match_source") == "slot_best_match" else "matched"
         )
     else:
         step["slot_status"] = step.get("slot_status") or "unfilled"
@@ -825,84 +825,6 @@ def _annotate_roadmap_step(
     return step
 
 
-def _try_reconcile_slot_assignment(
-    cur,
-    *,
-    assignment: EvaluateStepPayload,
-    stage_spec: StageSpecArtifact,
-    major_step: Optional[MajorStep],
-    tenant: TenantContext,
-    progression_graph_id: Optional[int],
-    stage_match_brief: Optional[PlanningSemanticBrief],
-    stage_goal: str,
-    stage_anti: Optional[List[str]],
-    path_primary: str,
-    path_tech_excludes: Optional[List[str]],
-) -> Optional[Dict[str, Any]]:
-    """
-    Bestehende Slot-Zuordnung behalten, wenn sie noch zum Stufen-Lernziel passt.
-
-    Validiert gegen dieselben Gates wie Match/QA (relaxed), inkl. Titel-Äquivalenz.
-    """
-    from planning_exercise_semantics import (
-        exercise_passes_stage_fit,
-        exercise_title_equivalent_to_stage_goal,
-    )
-
-    step = _path_step_from_slot_assignment(
-        cur,
-        assignment=assignment,
-        stage_spec=stage_spec,
-        major_step=major_step,
-        tenant=tenant,
-        progression_graph_id=progression_graph_id,
-    )
-    if not step:
-        return None
-
-    title = str(step.get("title") or "").strip()
-    summary = str(step.get("summary") or "").strip()
-    goal = ""
-    cur.execute("SELECT goal FROM exercises WHERE id = %s", (int(step["exercise_id"]),))
-    grow = cur.fetchone()
-    if grow:
-        goal = str(grow.get("goal") or "").strip()
-
-    lg = (stage_goal or stage_spec.learning_goal or "").strip()
-    if exercise_title_equivalent_to_stage_goal(title, lg):
-        step["roadmap_match_source"] = "slot_reconciled"
-        step["slot_status"] = "preserved"
-        step["reasons"] = ["Bestehende Zuordnung (Titel = Lernziel)"] + list(step.get("reasons") or [])[:2]
-        return _annotate_roadmap_step(
-            step,
-            stage_spec=stage_spec,
-            major_step=major_step,
-            anti_patterns_override=stage_anti,
-        )
-
-    if exercise_passes_stage_fit(
-        learning_goal=lg,
-        title=title,
-        summary=summary,
-        goal=goal,
-        stage_brief=stage_match_brief,
-        anti_patterns=stage_anti,
-        path_primary_topic=path_primary or None,
-        path_technique_excludes=path_tech_excludes,
-        relaxed=True,
-    ):
-        step["roadmap_match_source"] = "slot_reconciled"
-        step["slot_status"] = "preserved"
-        step["reasons"] = ["Bestehende Zuordnung (Stufen-Fit)"] + list(step.get("reasons") or [])[:2]
-        return _annotate_roadmap_step(
-            step,
-            stage_spec=stage_spec,
-            major_step=major_step,
-            anti_patterns_override=stage_anti,
-        )
-    return None
-
-
 def _stage_validation_context_for_spec(
     cur,
     *,
@@ -1138,7 +1060,16 @@ def _match_roadmap_slot(
         skill_expectations=skill_exp_api,
         anti_patterns_override=stage_anti,
     )
-    step["slot_status"] = "matched"
+    if (
+        slot_priority_exercise_id is not None
+        and int(step["exercise_id"]) == int(slot_priority_exercise_id)
+    ):
+        step["slot_status"] = "preserved"
+        step["roadmap_match_source"] = "slot_best_match"
+        step["reasons"] = ["Bester Treffer (bestehende Zuordnung)"] + list(step.get("reasons") or [])[:2]
+    else:
+        step["slot_status"] = "matched"
+        step["roadmap_match_source"] = "stage_spec"
     return step, None
 
 
@@ -1307,39 +1238,6 @@ def _build_steps_roadmap_first(
         slot_priority_id: Optional[int] = None
 
         if major_idx in assignments:
-            ctx = _stage_validation_context_for_spec(
-                cur,
-                body=body,
-                goal_query=goal_query,
-                semantic_brief=semantic_brief,
-                path_target_profile=path_target_profile,
-                roadmap_ctx=roadmap_ctx,
-                stage_spec=stage_spec,
-                step_index=step_index,
-                stage_count=stage_count,
-                major=major,
-            )
-            reconciled = _try_reconcile_slot_assignment(
-                cur,
-                assignment=assignments[major_idx],
-                stage_spec=stage_spec,
-                major_step=major,
-                tenant=tenant,
-                progression_graph_id=body.progression_graph_id,
-                stage_match_brief=ctx["stage_match_brief"],
-                stage_goal=ctx["stage_goal"],
-                stage_anti=ctx["stage_anti"],
-                path_primary=ctx["path_primary"],
-                path_tech_excludes=ctx["path_tech_excludes"],
-            )
-            if reconciled:
-                steps.append(reconciled)
-                eid = int(reconciled["exercise_id"])
-                used.add(eid)
-                planned_ids.append(eid)
-                anchor_id = eid
-                anchor_variant_id = reconciled.get("variant_id")
-                continue
             try:
                 slot_priority_id = int(assignments[major_idx].exercise_id)
             except (TypeError, ValueError):
diff --git a/backend/planning_exercise_path_qa.py b/backend/planning_exercise_path_qa.py
index 4cb3c85..d3e50a7 100644
--- a/backend/planning_exercise_path_qa.py
+++ b/backend/planning_exercise_path_qa.py
@@ -435,14 +435,7 @@ def detect_off_topic_steps(
     for idx, step in enumerate(steps):
         if step.get("is_ai_proposal") or step.get("exercise_id") is None:
             continue
-        stage_goal_early = (step.get("roadmap_learning_goal") or "").strip()
         bundle = _load_exercise_text_bundle(cur, int(step["exercise_id"]))
-        from planning_exercise_semantics import exercise_title_equivalent_to_stage_goal
-
-        if stage_goal_early and exercise_title_equivalent_to_stage_goal(
-            bundle["title"], stage_goal_early
-        ):
-            continue
         blob = _blob_from_fields(
             bundle["title"],
             bundle["summary"],
diff --git a/backend/planning_exercise_retrieval.py b/backend/planning_exercise_retrieval.py
index deb0d13..085e9e7 100644
--- a/backend/planning_exercise_retrieval.py
+++ b/backend/planning_exercise_retrieval.py
@@ -431,8 +431,30 @@ def rank_visible_library_hits(
                 step_phase=step_phase,
             )
 
+        rank_stage_sem = stage_semantic_score
+        stage_lg = (stage_learning_goal or "").strip()
+        if roadmap_stage_match and stage_lg:
+            raw_brief = build_stage_match_brief(
+                learning_goal=stage_lg,
+                anti_patterns=pack.get("stage_anti_patterns"),
+                phase=step_phase,
+            )
+            raw_sem, raw_reasons = score_exercise_stage_fit(
+                title=title_s,
+                summary=summary_s,
+                goal=goal_s,
+                variant_names=variants_by_ex.get(eid, []),
+                stage_brief=raw_brief,
+                step_phase=step_phase,
+            )
+            rank_stage_sem = max(stage_semantic_score, raw_sem)
+            if raw_sem > stage_semantic_score and raw_reasons:
+                for rr in raw_reasons:
+                    if rr not in stage_semantic_reasons:
+                        stage_semantic_reasons.append(rr)
+
         effective_semantic = (
-            stage_semantic_score
+            rank_stage_sem
             if roadmap_stage_match and stage_match_brief
             else semantic_score
         )
@@ -461,7 +483,7 @@ def rank_visible_library_hits(
                 summary=summary_s,
                 goal=goal_s,
                 stage_brief=stage_match_brief,
-                stage_semantic_score=stage_semantic_score,
+                stage_semantic_score=rank_stage_sem,
                 anti_patterns=pack.get("stage_anti_patterns"),
                 step_phase=step_phase,
                 path_primary_topic=pack.get("path_primary_topic"),
@@ -528,6 +550,7 @@ def rank_visible_library_hits(
                 "reasons": reasons,
                 "semantic_score": round(semantic_score, 4),
                 "stage_semantic_score": round(stage_semantic_score, 4),
+                "stage_rank_semantic": round(rank_stage_sem, 4),
                 "goal": goal_s,
             }
         )
diff --git a/backend/planning_exercise_semantics.py b/backend/planning_exercise_semantics.py
index ecb9ac9..cfc79d6 100644
--- a/backend/planning_exercise_semantics.py
+++ b/backend/planning_exercise_semantics.py
@@ -954,6 +954,7 @@ def enrich_brief_with_path_constraints(
 
 _MIN_STAGE_FIT_SEMANTIC = 0.30
 _MIN_STAGE_FIT_RELAXED = 0.20
+_MIN_TITLE_EQUIV_SEMANTIC = 0.15
 
 
 def build_stage_match_brief(
@@ -1101,8 +1102,7 @@ def exercise_passes_stage_fit(
     if constraints.exclude_phrases and _blob_matches_stage_excludes(blob, constraints.exclude_phrases):
         return False
 
-    if exercise_title_equivalent_to_stage_goal(title, learning_goal or lg):
-        return True
+    title_equiv = exercise_title_equivalent_to_stage_goal(title, learning_goal or lg)
 
     primary_path = (path_primary_topic or "").strip()
     if not primary_path and lg:
@@ -1114,7 +1114,7 @@ def exercise_passes_stage_fit(
         for item in technique_sibling_excludes(primary_path):
             if item not in tech_excludes:
                 tech_excludes.append(item)
-    if primary_path and not exercise_passes_technique_path_scope(
+    if primary_path and not title_equiv and not exercise_passes_technique_path_scope(
         primary_topic=primary_path,
         title=title,
         summary=summary,
@@ -1139,7 +1139,12 @@ def exercise_passes_stage_fit(
             step_phase=step_phase,
         )
 
-    threshold = _MIN_STAGE_FIT_RELAXED if relaxed else min_stage_semantic
+    if relaxed:
+        threshold = _MIN_STAGE_FIT_RELAXED
+    elif title_equiv:
+        threshold = _MIN_TITLE_EQUIV_SEMANTIC
+    else:
+        threshold = min_stage_semantic
     return float(stage_sem or 0.0) >= threshold
 
 
@@ -1291,7 +1296,11 @@ def pick_best_path_hit(
             summary = str(hit.get("summary") or "")
             goal_text = str(hit.get("goal") or hit.get("exercise_goal") or "")
             sem = float(hit.get("semantic_score") or 0.0)
-            stage_sem = float(hit.get("stage_semantic_score") or sem)
+            stage_sem = float(
+                hit.get("stage_rank_semantic")
+                or hit.get("stage_semantic_score")
+                or sem
+            )
 
             if roadmap_stage_match and stage_goal:
                 if not exercise_passes_stage_fit(
diff --git a/backend/tests/test_planning_roadmap_stage_match.py b/backend/tests/test_planning_roadmap_stage_match.py
index e620c95..5da8869 100644
--- a/backend/tests/test_planning_roadmap_stage_match.py
+++ b/backend/tests/test_planning_roadmap_stage_match.py
@@ -354,18 +354,57 @@ def test_title_equivalent_to_stage_goal():
     assert not exercise_title_equivalent_to_stage_goal("Kumite", "Hüftmobilität für Mae Geri")
 
 
-def test_stage_fit_passes_for_title_equivalent_despite_missing_path_technique():
+def test_stage_fit_passes_for_title_equivalent_with_sufficient_semantic_score():
     stage_goal = "Koordination Absprung ohne Kick"
     assert exercise_passes_stage_fit(
         learning_goal=stage_goal,
         title=stage_goal,
-        summary="",
+        summary="Absprung und Landung koordinieren",
         goal="",
         path_primary_topic="mawashi geri",
         path_technique_excludes=["kumite"],
+        stage_semantic_score=0.42,
     )
 
 
+def test_pick_best_prefers_semantic_fit_over_coincidental_title():
+    stage_goal = "Hüftmobilität für Mawashi Geri"
+    stage_brief = build_stage_match_brief(learning_goal=stage_goal)
+    hits = [
+        {
+            "id": 1,
+            "title": "Hüftmobilität für Mawashi Geri",
+            "summary": "allgemeine Aufwärmung",
+            "goal": "",
+            "score": 0.9,
+            "semantic_score": 0.12,
+            "stage_semantic_score": 0.12,
+            "stage_rank_semantic": 0.35,
+        },
+        {
+            "id": 2,
+            "title": "Mawashi Hüftmobilität und Adduktoren",
+            "summary": "Dehnung Hüfte für Rundtritt",
+            "goal": "Mawashi Geri Hüftbeweglichkeit",
+            "score": 0.72,
+            "semantic_score": 0.58,
+            "stage_semantic_score": 0.58,
+            "stage_rank_semantic": 0.62,
+        },
+    ]
+    chosen = pick_best_path_hit(
+        hits,
+        set(),
+        stage_learning_goal=stage_goal,
+        roadmap_stage_match=True,
+        stage_match_brief=stage_brief,
+        path_primary_topic="mawashi geri",
+        path_technique_excludes=technique_sibling_excludes("mawashi geri"),
+    )
+    assert chosen is not None
+    assert int(chosen["id"]) == 2
+
+
 def test_pick_roadmap_relaxed_with_path_primary_when_strict_fails():
     """Bestehende Graph-Übungen: relaxed Gate auch bei gesetztem path_primary_topic."""
     stage_goal = "Hüftmobilität für Mawashi Geri"