Enhance Stage Matching and Retrieval Logic in Planning Exercise

- Introduced `build_stage_match_brief` to create stage-specific semantic briefs, improving roadmap matching accuracy. - Updated path retrieval logic to differentiate between general and stage-specific semantic weights, enhancing exercise relevance. - Added support for anti-patterns and success criteria in stage matching, allowing for more nuanced exercise selection. - Enhanced tests to validate new stage matching features and ensure correct functionality against learning goals. - Incremented application version to reflect these updates.
2026-06-10 17:02:21 +02:00 · 2026-06-10 17:02:21 +02:00 · 07e147bc76
commit 07e147bc76
parent 18547613ea
6 changed files with 591 additions and 81 deletions
--- a/backend/planning_exercise_path_builder.py
+++ b/backend/planning_exercise_path_builder.py
@ -32,13 +32,14 @@ from planning_exercise_retrieval import run_multistage_planning_retrieval
 from planning_exercise_semantics import (
    PlanningSemanticBrief,
    apply_path_retrieval_weights,
    apply_stage_match_retrieval_weights,
    brief_to_summary_dict,
    build_semantic_brief,
    build_stage_match_brief,
    enrich_target_with_semantic_expectations,
    exercise_passes_path_semantic_gate,
    pick_best_path_hit,
    resolve_semantic_skill_weights,
    semantic_brief_for_stage,
    step_phase_for_index,
    step_retrieval_query,
    try_enrich_semantic_brief_with_llm,
@ -185,14 +186,18 @@ def _pick_best_path_hit(
    *,
    semantic_brief: Optional[PlanningSemanticBrief] = None,
    stage_learning_goal: Optional[str] = None,
    stage_anti_patterns: Optional[List[str]] = None,
    roadmap_stage_match: bool = False,
    stage_match_brief: Optional[PlanningSemanticBrief] = None,
 ) -> Optional[Dict[str, Any]]:
    return pick_best_path_hit(
        hits,
        used_exercise_ids,
        semantic_brief=semantic_brief,
        stage_learning_goal=stage_learning_goal,
        stage_anti_patterns=stage_anti_patterns,
        roadmap_stage_match=roadmap_stage_match,
        stage_match_brief=stage_match_brief,
    )
@ -292,6 +297,11 @@ def _run_path_step_retrieval(
    step_phase_override: Optional[str] = None,
    step_target_profile_override: Optional[PlanningTargetProfile] = None,
    stage_learning_goal: Optional[str] = None,
    stage_anti_patterns: Optional[List[str]] = None,
    stage_match_brief: Optional[PlanningSemanticBrief] = None,
    stage_success_criteria: Optional[List[str]] = None,
    stage_load_profile: Optional[List[str]] = None,
    path_context_note: Optional[str] = None,
 ) -> Tuple[List[Dict[str, Any]], PlanningTargetProfile, Dict[str, Any], str]:
    step_query = step_query_override or step_retrieval_query(
        semantic_brief, goal_query, step_index, max_steps
@ -328,7 +338,12 @@ def _run_path_step_retrieval(
        "path_step_phase": step_phase_override
        or step_phase_for_index(semantic_brief, step_index, max_steps),
        "stage_learning_goal": (stage_learning_goal or "").strip() or None,
        "stage_anti_patterns": list(stage_anti_patterns or []),
        "roadmap_stage_match": bool((stage_learning_goal or "").strip()),
        "stage_match_brief": stage_match_brief,
        "stage_success_criteria": list(stage_success_criteria or []),
        "stage_load_profile": list(stage_load_profile or []),
        "path_context_note": (path_context_note or "").strip() or None,
    }
    pack = apply_progression_context_to_pack(
        cur,
@ -383,6 +398,9 @@ def _run_path_step_retrieval(
            has_planning_reference=has_plan_ref,
        )
    if pack.get("roadmap_stage_match"):
        weights = apply_stage_match_retrieval_weights(semantic_brief)
    else:
        weights = apply_path_retrieval_weights(semantic_brief)
    profile_id = tenant.profile_id
@ -490,6 +508,8 @@ def _annotate_roadmap_step(
    step["roadmap_major_step_index"] = stage_spec.major_step_index
    step["roadmap_phase"] = major_step.phase if major_step else None
    step["roadmap_learning_goal"] = learning_goal or None
    if stage_spec.anti_patterns:
        step["roadmap_anti_patterns"] = list(stage_spec.anti_patterns)
    step["roadmap_match_source"] = "stage_spec"
    if skill_expectations:
        step["skill_expectations"] = skill_expectations
@ -569,10 +589,22 @@ def _build_steps_roadmap_first(
        )
        step_kind = resolve_step_exercise_kind_filter(stage_spec, body.exercise_kind_any)
        stage_goal = (stage_spec.learning_goal or "").strip()
-        stage_brief = semantic_brief_for_stage(
+        stage_anti = list(stage_spec.anti_patterns or [])
-            semantic_brief,
+        path_context_note = None
        if rs_dump:
            ctx_parts = [
                str(rs_dump.get("start_situation") or "").strip()[:120],
                str(rs_dump.get("target_state") or "").strip()[:120],
                str(rs_dump.get("roadmap_notes") or "").strip()[:120],
            ]
            path_context_note = " ".join(p for p in ctx_parts if p)[:240] or None
        stage_match_brief = build_stage_match_brief(
            learning_goal=stage_goal,
            anti_patterns=stage_anti,
            success_criteria=list(stage_spec.success_criteria or []),
            load_profile=list(stage_spec.load_profile or []),
            phase=major.phase if major else None,
            path_context_note=path_context_note,
        )
        hits, _, _, _ = _run_path_step_retrieval(
@ -587,21 +619,28 @@ def _build_steps_roadmap_first(
            progression_graph_id=body.progression_graph_id,
            include_llm_intent=body.include_llm_intent and step_index == 0,
            exercise_kind_any=step_kind,
-            semantic_brief=stage_brief,
+            semantic_brief=stage_match_brief,
            path_target_profile=path_target_profile,
            path_intent=path_intent,
            step_query_override=step_query,
            step_phase_override=major.phase if major else None,
            step_target_profile_override=step_target,
            stage_learning_goal=stage_goal or None,
            stage_anti_patterns=stage_anti or None,
            stage_match_brief=stage_match_brief,
            stage_success_criteria=list(stage_spec.success_criteria or []),
            stage_load_profile=list(stage_spec.load_profile or []),
            path_context_note=path_context_note,
        )
        hit = _pick_best_path_hit(
            hits,
            used,
-            semantic_brief=stage_brief,
+            semantic_brief=stage_match_brief,
            stage_learning_goal=stage_goal or None,
            stage_anti_patterns=stage_anti or None,
            roadmap_stage_match=True,
            stage_match_brief=stage_match_brief,
        )
        if not hit:
--- a/backend/planning_exercise_path_qa.py
+++ b/backend/planning_exercise_path_qa.py
@ -426,12 +426,14 @@ def detect_off_topic_steps(
            brief=step_brief,
            step_phase=phase,
        )
        stage_anti = list(step.get("roadmap_anti_patterns") or [])
        if stage_goal and not exercise_passes_stage_learning_goal_gate(
            learning_goal=stage_goal,
            title=bundle["title"],
            summary=bundle["summary"],
            goal=bundle["goal"],
            semantic_score=sem,
            anti_patterns=stage_anti or None,
        ):
            off_topic.append(
                {
--- a/backend/planning_exercise_retrieval.py
+++ b/backend/planning_exercise_retrieval.py
@ -14,11 +14,14 @@ from planning_exercise_profiles import (
    load_exercise_match_profiles_bulk,
    score_exercise_against_target,
 )
 from exercise_ai import strip_html_to_plain
 from planning_exercise_semantics import (
    PlanningSemanticBrief,
    build_stage_match_brief,
    exercise_passes_path_semantic_gate,
-    exercise_passes_stage_learning_goal_gate,
+    exercise_passes_stage_fit,
    score_exercise_semantic_relevance,
    score_exercise_stage_fit,
 )
 _MAX_LIBRARY_ROWS = 8000
@ -149,7 +152,7 @@ def _load_exercise_goals_chunked(cur, exercise_ids: Sequence[int], *, batch: int
        ph = ",".join(["%s"] * len(chunk))
        cur.execute(f"SELECT id, goal FROM exercises WHERE id IN ({ph})", chunk)
        for row in cur.fetchall():
-            out[int(row["id"])] = str(row.get("goal") or "")
+            out[int(row["id"])] = strip_html_to_plain(row.get("goal"), max_len=1200)
    return out
@ -203,6 +206,19 @@ def rank_visible_library_hits(
    path_mode = pack.get("context_mode") == "progression_path"
    stage_learning_goal = (pack.get("stage_learning_goal") or "").strip()
    roadmap_stage_match = bool(pack.get("roadmap_stage_match"))
    stage_match_brief_raw = pack.get("stage_match_brief")
    stage_match_brief: Optional[PlanningSemanticBrief] = None
    if isinstance(stage_match_brief_raw, PlanningSemanticBrief):
        stage_match_brief = stage_match_brief_raw
    elif roadmap_stage_match and stage_learning_goal:
        stage_match_brief = build_stage_match_brief(
            learning_goal=stage_learning_goal,
            anti_patterns=pack.get("stage_anti_patterns"),
            success_criteria=pack.get("stage_success_criteria"),
            load_profile=pack.get("stage_load_profile"),
            phase=step_phase,
            path_context_note=pack.get("path_context_note"),
        )
    last_planned_skills: Set[int] = set()
    planned_ids = pack.get("planned_exercise_ids") or []
@ -229,7 +245,11 @@ def rank_visible_library_hits(
    skills_by_ex = _load_skill_sets_chunked(cur, cand_ids)
    goals_by_ex: Dict[int, str] = {}
    variants_by_ex: Dict[int, List[str]] = {}
-    if semantic_brief and semantic_brief.semantic_strength > 0.05:
+    need_exercise_semantic_text = (
        (semantic_brief and semantic_brief.semantic_strength > 0.05)
        or (stage_match_brief and stage_match_brief.semantic_strength > 0.05)
    )
    if need_exercise_semantic_text:
        goals_by_ex = _load_exercise_goals_chunked(cur, cand_ids)
        variants_by_ex = _load_variant_names_chunked(cur, cand_ids)
@ -270,52 +290,75 @@ def rank_visible_library_hits(
                emp, target, intent=intent
            )
        title_s = str(row.get("title") or "")
        summary_s = str(row.get("summary") or "")
        goal_s = goals_by_ex.get(eid, "")
        semantic_score = 0.0
        semantic_reasons: List[str] = []
        if semantic_brief and semantic_brief.semantic_strength > 0.05:
            semantic_score, semantic_reasons = score_exercise_semantic_relevance(
-                title=str(row.get("title") or ""),
+                title=title_s,
-                summary=str(row.get("summary") or ""),
+                summary=summary_s,
-                goal=goals_by_ex.get(eid, ""),
+                goal=goal_s,
                variant_names=variants_by_ex.get(eid, []),
                brief=semantic_brief,
                step_phase=step_phase,
            )
        stage_semantic_score = 0.0
        stage_semantic_reasons: List[str] = []
        if stage_match_brief and stage_match_brief.semantic_strength > 0.05:
            stage_semantic_score, stage_semantic_reasons = score_exercise_stage_fit(
                title=title_s,
                summary=summary_s,
                goal=goal_s,
                variant_names=variants_by_ex.get(eid, []),
                stage_brief=stage_match_brief,
                step_phase=step_phase,
            )
        effective_semantic = (
            stage_semantic_score
            if roadmap_stage_match and stage_match_brief
            else semantic_score
        )
        score_penalty = 0.0
        stage_match_reason: Optional[str] = None
        if (
            path_mode
            and not roadmap_stage_match
            and semantic_brief
            and semantic_brief.semantic_strength >= 0.55
            and not exercise_passes_path_semantic_gate(
                semantic_score=semantic_score,
-                title=str(row.get("title") or ""),
+                title=title_s,
-                summary=str(row.get("summary") or ""),
+                summary=summary_s,
-                goal=goals_by_ex.get(eid, ""),
+                goal=goal_s,
                brief=semantic_brief,
                strict=True,
            )
        ):
            score_penalty = 0.42
        if roadmap_stage_match and stage_learning_goal:
-            title_s = str(row.get("title") or "")
+            if exercise_passes_stage_fit(
            summary_s = str(row.get("summary") or "")
            goal_s = goals_by_ex.get(eid, "")
            if exercise_passes_stage_learning_goal_gate(
                learning_goal=stage_learning_goal,
                title=title_s,
                summary=summary_s,
                goal=goal_s,
-                semantic_score=semantic_score,
+                stage_brief=stage_match_brief,
                stage_semantic_score=stage_semantic_score,
                anti_patterns=pack.get("stage_anti_patterns"),
                step_phase=step_phase,
            ):
-                score_penalty = max(0.0, score_penalty - 0.08)
+                score_penalty = max(0.0, score_penalty - 0.10)
                stage_match_reason = "Passt zum Stufen-Lernziel"
            else:
-                score_penalty += 0.35
+                score_penalty += 0.48
        score = (
-            weights.get("semantic", 0.0) * semantic_score
+            weights.get("semantic", 0.0) * effective_semantic
            + weights["fulltext"] * ft_norm
            + weights["progression"] * prog_hit
            + weights["skill"] * skill_sim
@ -329,7 +372,11 @@ def rank_visible_library_hits(
        reasons: List[str] = []
        if stage_match_reason:
            reasons.append(stage_match_reason)
-        if semantic_score >= 0.35 and semantic_reasons:
+        if roadmap_stage_match and stage_semantic_score >= 0.30 and stage_semantic_reasons:
            for sr in stage_semantic_reasons:
                if sr not in reasons:
                    reasons.append(sr)
        elif semantic_score >= 0.35 and semantic_reasons:
            for sr in semantic_reasons:
                if sr not in reasons:
                    reasons.append(sr)
@ -365,6 +412,8 @@ def rank_visible_library_hits(
                "score": round(max(0.0, min(1.0, score)), 4),
                "reasons": reasons,
                "semantic_score": round(semantic_score, 4),
                "stage_semantic_score": round(stage_semantic_score, 4),
                "goal": goal_s,
            }
        )
        succ_variants = pack.get("progression_successor_variants") or {}
--- a/backend/planning_exercise_semantics.py
+++ b/backend/planning_exercise_semantics.py
@ -9,6 +9,7 @@ from __future__ import annotations
 import json
 import logging
 import re
 from dataclasses import dataclass, field
 from typing import Any, Dict, List, Mapping, Optional, Sequence, Tuple
 from pydantic import BaseModel, Field, field_validator
@ -462,7 +463,7 @@ def score_exercise_semantic_relevance(
    core_hits = sum(1 for ph in core if _phrase_in_blob(ph, blob))
    must_hits = sum(1 for ph in must if _phrase_in_blob(ph, blob))
-    exclude_hits = sum(1 for ph in exclude if _phrase_in_blob(ph, blob))
+    exclude_hits = sum(1 for ph in exclude if _phrase_excluded_in_blob(ph, blob))
    score = 0.0
    if core:
@ -623,9 +624,82 @@ _STAGE_GOAL_STOPWORDS = _QUERY_STOPWORDS | frozenset(
 )
-def _significant_stage_tokens(learning_goal: str) -> List[str]:
+_STAGE_NEGATION_PATTERNS = (
-    """Wörter aus Stufen-Lernziel für Text-Match (ohne Füllwörter)."""
+    r"\bohne\s+([^,.;]+)",
-    raw = re.findall(r"[a-zäöüß]{4,}", _normalize_phrase(learning_goal), flags=re.IGNORECASE)
+    r"\bkein(?:e|en|er|em)?\s+([^,.;]+)",
    r"\bnicht\s+([^,.;]+)",
 )
 # Aus „ohne Tritttechnik“ etc. — erweiterte Treffer im Übungstext
 _STAGE_EXCLUDE_ALIASES: Dict[str, Tuple[str, ...]] = {
    "tritttechnik": (
        "tritttechnik",
        "trittpraezision",
        "trittpräzision",
        "tritt praesision",
        "tritt-präzision",
        "kicktechnik",
        "tritt ausführung",
        "tritt ausfuehrung",
    ),
    "kumite": ("kumite", "partnerkampf", "freikampf", "jiyu kumite"),
    "kraftuebung": ("kraftuebung", "kraftübung", "krafttraining", "kraftübungen"),
    "anwendung": ("kumite anwendung", "kampfanwendung"),
 }
 _STAGE_FOCUS_TOKENS = frozenset(
    {
        "koordination",
        "absprung",
        "beinhebung",
        "landung",
        "sprung",
        "sprungphase",
        "balance",
        "gleichgewicht",
        "timing",
        "vorbereitung",
        "athletik",
        "mobilitaet",
        "mobilität",
        "stabilisation",
        "stabilisierung",
    }
 )
@dataclass
 class StageGoalConstraints:
    positive_tokens: List[str] = field(default_factory=list)
    exclude_phrases: List[str] = field(default_factory=list)
    has_negation: bool = False
    strict_positive: bool = False
 def _expand_stage_exclude_phrase(phrase: str) -> List[str]:
    norm = _normalize_phrase(phrase)
    if not norm:
        return []
    out: List[str] = [norm]
    compact = norm.replace(" ", "")
    if compact and compact not in out:
        out.append(compact)
    for key, aliases in _STAGE_EXCLUDE_ALIASES.items():
        if key in norm or norm in key:
            for alias in aliases:
                a = _normalize_phrase(alias)
                if a and a not in out:
                    out.append(a)
    return out[:12]
 def _significant_stage_tokens(learning_goal: str, *, strip_negated: bool = True) -> List[str]:
    """Wörter aus Stufen-Lernziel für Text-Match (ohne Füllwörter, ohne Negationssegmente)."""
    text = _normalize_phrase(learning_goal)
    if strip_negated:
        for pat in _STAGE_NEGATION_PATTERNS:
            text = re.sub(pat, " ", text)
    raw = re.findall(r"[a-zäöüß]{4,}", text, flags=re.IGNORECASE)
    out: List[str] = []
    for w in raw:
        low = w.lower().replace("ä", "ae").replace("ö", "oe").replace("ü", "ue")
@ -636,19 +710,245 @@ def _significant_stage_tokens(learning_goal: str) -> List[str]:
    return out[:10]
 def parse_stage_goal_constraints(
    learning_goal: str,
    anti_patterns: Optional[Sequence[str]] = None,
 ) -> StageGoalConstraints:
    """Positiv/Negativ aus Stufen-Lernziel + anti_patterns (Roadmap-Stufe)."""
    lg = (learning_goal or "").strip()
    if len(lg) < 3:
        return StageGoalConstraints()
    norm = _normalize_phrase(lg)
    exclude: List[str] = []
    has_negation = False
    for pat in _STAGE_NEGATION_PATTERNS:
        for m in re.finditer(pat, norm):
            has_negation = True
            chunk = (m.group(1) or "").strip()
            if chunk:
                exclude.extend(_expand_stage_exclude_phrase(chunk))
    for raw in anti_patterns or []:
        s = _normalize_phrase(str(raw or ""))
        if s:
            exclude.extend(_expand_stage_exclude_phrase(s))
    positive = _significant_stage_tokens(lg, strip_negated=True)
    focus_hits = [t for t in positive if t in _STAGE_FOCUS_TOKENS]
    strict_positive = bool(focus_hits) or has_negation
    dedup_exclude: List[str] = []
    for item in exclude:
        if item and item not in dedup_exclude:
            dedup_exclude.append(item)
    return StageGoalConstraints(
        positive_tokens=positive,
        exclude_phrases=dedup_exclude[:16],
        has_negation=has_negation,
        strict_positive=strict_positive,
    )
 def _phrase_excluded_in_blob(phrase: str, blob: str) -> bool:
    """Treffer nur wenn das Ausschluss-Thema nicht selbst negiert beschrieben ist."""
    if not phrase or not blob:
        return False
    if not _phrase_in_blob(phrase, blob):
        return False
    norm = _normalize_phrase(phrase)
    for pat in _STAGE_NEGATION_PATTERNS:
        for m in re.finditer(pat, blob):
            chunk = _normalize_phrase(m.group(1) or "")
            if not chunk:
                continue
            if norm in chunk or chunk in norm or _phrase_in_blob(norm, chunk):
                return False
    return True
 def _blob_matches_stage_excludes(blob: str, exclude_phrases: Sequence[str]) -> bool:
    for phrase in exclude_phrases:
        if _phrase_excluded_in_blob(phrase, blob):
            return True
    return False
 _MIN_STAGE_FIT_SEMANTIC = 0.30
 _MIN_STAGE_FIT_RELAXED = 0.20
 def build_stage_match_brief(
    *,
    learning_goal: str,
    anti_patterns: Optional[Sequence[str]] = None,
    success_criteria: Optional[Sequence[str]] = None,
    load_profile: Optional[Sequence[str]] = None,
    phase: Optional[str] = None,
    path_context_note: Optional[str] = None,
 ) -> PlanningSemanticBrief:
    """
    Stufen-zentrierter Semantik-Brief — unabhängig vom Gesamt-Pfad-Thema.
    Primär für Roadmap-Match: Bewertung gegen Titel + Kurzbeschreibung + Übungsziel.
    """
    lg = (learning_goal or "").strip()
    if len(lg) < 3:
        return PlanningSemanticBrief(semantic_strength=0.0)
    constraints = parse_stage_goal_constraints(lg, anti_patterns)
    must: List[str] = []
    norm_lg = _normalize_phrase(lg)
    for token in constraints.positive_tokens:
        if token not in must:
            must.append(token)
    if norm_lg and norm_lg not in must:
        must.append(norm_lg[:120])
    for raw in success_criteria or []:
        s = _normalize_phrase(str(raw or ""))
        if s and s not in must:
            must.append(s[:100])
    for raw in load_profile or []:
        s = _normalize_phrase(str(raw or ""))
        if s and s not in must:
            must.append(s[:60])
    retrieval_parts = [norm_lg]
    if path_context_note:
        note = _normalize_phrase(path_context_note)[:200]
        if note:
            retrieval_parts.append(note)
    arc: List[str] = []
    ph = (phase or "").strip().lower()
    if ph:
        arc.append(ph)
    return PlanningSemanticBrief(
        primary_topic="",
        topic_type="focus",
        must_phrases=must[:12],
        exclude_phrases=list(constraints.exclude_phrases)[:12],
        development_arc=arc[:4],
        retrieval_query=" ".join(p for p in retrieval_parts if p)[:500],
        semantic_strength=0.78,
        rationale="stage_match_brief",
    )
 def score_exercise_stage_fit(
    *,
    title: str,
    summary: str,
    goal: str,
    stage_brief: PlanningSemanticBrief,
    variant_names: Optional[Sequence[str]] = None,
    step_phase: Optional[str] = None,
 ) -> Tuple[float, List[str]]:
    """Semantik-Score Übung ↔ Stufen-Lernziel (Titel + Summary + Goal)."""
    score, reasons = score_exercise_semantic_relevance(
        title=title,
        summary=summary,
        goal=goal,
        variant_names=variant_names or [],
        brief=stage_brief,
        step_phase=step_phase,
    )
    blob = _blob_from_fields(title, summary, goal, variant_names or [])
    focus_tokens = [
        t
        for t in (stage_brief.must_phrases or [])
        if t and " " not in t and len(t) >= 4
    ][:6]
    if focus_tokens:
        hits = sum(1 for t in focus_tokens if _phrase_in_blob(t, blob))
        ratio = hits / len(focus_tokens)
        bonus = 0.28 * ratio
        if bonus > 0:
            score = min(1.0, score + bonus)
            if hits >= max(1, len(focus_tokens) // 2):
                reasons = ["Stufen-Schwerpunkte im Übungstext", *reasons]
    return max(0.0, min(1.0, round(score, 4))), reasons[:4]
 def exercise_passes_stage_fit(
    *,
    learning_goal: str,
    title: str,
    summary: str = "",
    goal: str = "",
    stage_brief: Optional[PlanningSemanticBrief] = None,
    stage_semantic_score: Optional[float] = None,
    anti_patterns: Optional[Sequence[str]] = None,
    step_phase: Optional[str] = None,
    min_stage_semantic: float = _MIN_STAGE_FIT_SEMANTIC,
    relaxed: bool = False,
 ) -> bool:
    """Allgemeines Stufen-Fit-Gate: voller Übungstext vs. Stufen-Brief."""
    lg = (learning_goal or "").strip()
    if len(lg) < 3:
        return True
    blob = _blob_from_fields(title, summary, goal, [])
    constraints = parse_stage_goal_constraints(lg, anti_patterns)
    if constraints.exclude_phrases and _blob_matches_stage_excludes(blob, constraints.exclude_phrases):
        return False
    brief = stage_brief or build_stage_match_brief(
        learning_goal=lg,
        anti_patterns=anti_patterns,
    )
    stage_sem = stage_semantic_score
    if stage_sem is None:
        stage_sem, _ = score_exercise_stage_fit(
            title=title,
            summary=summary,
            goal=goal,
            stage_brief=brief,
            step_phase=step_phase,
        )
    threshold = _MIN_STAGE_FIT_RELAXED if relaxed else min_stage_semantic
    return float(stage_sem or 0.0) >= threshold
 def apply_stage_match_retrieval_weights(brief: PlanningSemanticBrief) -> Dict[str, float]:
    """Roadmap-Stufe: Stufen-Semantik (Ziel/Summary/Goal) dominiert."""
    return {
        "semantic": 0.58,
        "fulltext": 0.14,
        "profile": 0.18,
        "progression": 0.04,
        "skill": 0.04,
        "plan": 0.02,
        "repeat_unit": -0.40,
        "repeat_group": -0.15,
    }
 def semantic_brief_for_stage(
    brief: PlanningSemanticBrief,
    *,
    learning_goal: str,
    phase: Optional[str] = None,
    anti_patterns: Optional[Sequence[str]] = None,
 ) -> PlanningSemanticBrief:
-    """Brief um Stufen-Lernziel erweitern — für Roadmap-Match pro Major Step."""
+    """Legacy: globalen Brief anreichern — bevorzugt build_stage_match_brief für Roadmap-Match."""
    lg = _normalize_phrase(learning_goal)
    if not lg:
        return brief
    constraints = parse_stage_goal_constraints(learning_goal, anti_patterns)
    must = list(brief.must_phrases or [])
    for token in constraints.positive_tokens[:4]:
        if token not in must:
            must.append(token)
    if lg not in must:
        must.insert(0, lg[:120])
    exclude = list(brief.exclude_phrases or [])
    for item in constraints.exclude_phrases:
        if item not in exclude:
            exclude.append(item)
    arc = list(brief.development_arc or [])
    ph = (phase or "").strip().lower()
    if ph and ph not in arc:
@ -657,6 +957,7 @@ def semantic_brief_for_stage(
    return brief.model_copy(
        update={
            "must_phrases": must[:12],
            "exclude_phrases": exclude[:12],
            "development_arc": arc[:8],
            "semantic_strength": min(1.0, strength),
        }
@ -672,33 +973,24 @@ def exercise_passes_stage_learning_goal_gate(
    semantic_score: float = 0.0,
    min_semantic: float = 0.20,
    relaxed: bool = False,
    anti_patterns: Optional[Sequence[str]] = None,
    stage_brief: Optional[PlanningSemanticBrief] = None,
    stage_semantic_score: Optional[float] = None,
    step_phase: Optional[str] = None,
 ) -> bool:
-    """Roadmap-Stufe: Übung muss zum Stufen-Lernziel passen, nicht nur zum Gesamtthema."""
+    """Roadmap-Stufe: delegiert an exercise_passes_stage_fit (Titel + Summary + Goal)."""
-    lg = (learning_goal or "").strip()
+    del semantic_score, min_semantic
-    if len(lg) < 3:
+    return exercise_passes_stage_fit(
-        return True
+        learning_goal=learning_goal,
-
+        title=title,
-    blob = _blob_from_fields(title, summary, goal, [])
+        summary=summary,
-    norm_lg = _normalize_phrase(lg)
+        goal=goal,
-    if _phrase_in_blob(norm_lg, blob):
+        stage_brief=stage_brief,
-        return True
+        stage_semantic_score=stage_semantic_score,
-
+        anti_patterns=anti_patterns,
-    tokens = _significant_stage_tokens(lg)
+        step_phase=step_phase,
-    if not tokens:
+        relaxed=relaxed,
-        threshold = 0.12 if relaxed else min_semantic
+    )
        return semantic_score >= threshold
    hits = sum(1 for t in tokens if _phrase_in_blob(t, blob))
    if len(tokens) <= 2:
        required = 1
    else:
        required = max(2, (len(tokens) + 1) // 2)
    if hits >= required:
        return True
    threshold = 0.14 if relaxed else min_semantic
    return semantic_score >= threshold
 def exercise_passes_path_semantic_gate(
@ -739,7 +1031,9 @@ def pick_best_path_hit(
    *,
    semantic_brief: Optional[PlanningSemanticBrief] = None,
    stage_learning_goal: Optional[str] = None,
    stage_anti_patterns: Optional[Sequence[str]] = None,
    roadmap_stage_match: bool = False,
    stage_match_brief: Optional[PlanningSemanticBrief] = None,
 ) -> Optional[Dict[str, Any]]:
    """Gestufte Auswahl: strikt → relaxed → optional Notfall-Fallback."""
    if not hits:
@ -747,6 +1041,13 @@ def pick_best_path_hit(
    stage_goal = (stage_learning_goal or "").strip()
    stage_brief: Optional[PlanningSemanticBrief] = stage_match_brief
    if roadmap_stage_match and stage_goal and stage_brief is None:
        stage_brief = build_stage_match_brief(
            learning_goal=stage_goal,
            anti_patterns=stage_anti_patterns,
        )
    def _scan(*, strict: bool) -> Optional[Dict[str, Any]]:
        best: Optional[Dict[str, Any]] = None
        best_key: Tuple[float, float] = (-1.0, -1.0)
@ -754,28 +1055,38 @@ def pick_best_path_hit(
            eid = int(hit["id"])
            if eid in used_exercise_ids:
                continue
            sem = float(hit.get("semantic_score") or 0.0)
            title = str(hit.get("title") or "")
            summary = str(hit.get("summary") or "")
            goal_text = str(hit.get("goal") or hit.get("exercise_goal") or "")
            sem = float(hit.get("semantic_score") or 0.0)
            stage_sem = float(hit.get("stage_semantic_score") or sem)
            if roadmap_stage_match and stage_goal:
                if not exercise_passes_stage_fit(
                    learning_goal=stage_goal,
                    title=title,
                    summary=summary,
                    goal=goal_text,
                    stage_brief=stage_brief,
                    stage_semantic_score=stage_sem,
                    anti_patterns=stage_anti_patterns,
                    relaxed=not strict,
                ):
                    continue
            else:
                if semantic_brief and not exercise_passes_path_semantic_gate(
                    semantic_score=sem,
                    title=title,
                    summary=summary,
-                goal="",
+                    goal=goal_text,
                    brief=semantic_brief,
                    strict=strict,
                ):
                    continue
-            if stage_goal and not exercise_passes_stage_learning_goal_gate(
+
                learning_goal=stage_goal,
                title=title,
                summary=summary,
                semantic_score=sem,
                relaxed=not strict,
            ):
                continue
            score = float(hit.get("score") or 0.0)
-            key = (sem, score)
+            rank_sem = stage_sem if roadmap_stage_match and stage_goal else sem
            key = (rank_sem, score)
            if key > best_key:
                best_key = key
                best = hit
@ -820,9 +1131,15 @@ __all__ = [
    "build_semantic_brief",
    "enrich_target_with_semantic_expectations",
    "exercise_passes_path_semantic_gate",
    "StageGoalConstraints",
    "apply_stage_match_retrieval_weights",
    "build_stage_match_brief",
    "exercise_passes_stage_fit",
    "exercise_passes_stage_learning_goal_gate",
    "merge_semantic_brief_llm",
    "parse_stage_goal_constraints",
    "pick_best_path_hit",
    "score_exercise_stage_fit",
    "semantic_brief_for_stage",
    "resolve_semantic_skill_weights",
    "score_exercise_semantic_relevance",
--- a/backend/tests/test_planning_roadmap_stage_match.py
+++ b/backend/tests/test_planning_roadmap_stage_match.py
@ -1,7 +1,9 @@
 """Tests Roadmap-Stufen-Match — Gate gegen themenfremde Übungen."""
 from planning_exercise_semantics import (
    build_stage_match_brief,
    exercise_passes_stage_learning_goal_gate,
    pick_best_path_hit,
    score_exercise_stage_fit,
    semantic_brief_for_stage,
    build_semantic_brief,
 )
@ -35,10 +37,37 @@ def test_semantic_brief_for_stage_adds_learning_goal():
    assert "hüftmobilität und kammerhaltung" in stage.must_phrases[0]
 def test_build_stage_match_brief_uses_stage_tokens_not_global_topic():
    brief = build_stage_match_brief(
        learning_goal="Koordination von Absprung und Beinhebung ohne Tritttechnik",
        phase="vertiefung",
    )
    must_blob = " ".join(brief.must_phrases or []).lower()
    assert "mawashi" not in must_blob
    assert "absprung" in must_blob
    assert not (brief.primary_topic or "").strip()
 def test_stage_fit_prefers_goal_over_misleading_title():
    stage_goal = "Koordination von Absprung und Beinhebung ohne Tritttechnik"
    stage_brief = build_stage_match_brief(learning_goal=stage_goal)
    kick_score, _ = score_exercise_stage_fit(
        title="Mawashi Geri Trittpräzision",
        summary="Kicktechnik",
        goal="Präzision im Tritt und Hüftarbeit",
        stage_brief=stage_brief,
    )
    coord_score, _ = score_exercise_stage_fit(
        title="Allgemeines Sprungtraining",
        summary="Athletik",
        goal="Absprung, Beinhebung und Landung koordinieren — ohne Trittausführung",
        stage_brief=stage_brief,
    )
    assert coord_score > kick_score
 def test_pick_best_path_hit_roadmap_stage_no_weak_fallback():
-    brief = build_semantic_brief("Mae Geri Perfektion")
+    stage_brief = build_stage_match_brief(
    stage_brief = semantic_brief_for_stage(
        brief,
        learning_goal="Hüftmobilität für Mae Geri",
        phase="grundlage",
    )
@ -69,9 +98,7 @@ def test_pick_best_path_hit_roadmap_stage_no_weak_fallback():
 def test_pick_best_path_hit_roadmap_stage_picks_relevant():
-    brief = build_semantic_brief("Mae Geri Perfektion")
+    stage_brief = build_stage_match_brief(
    stage_brief = semantic_brief_for_stage(
        brief,
        learning_goal="Hüftmobilität für Mae Geri",
        phase="grundlage",
    )
@ -94,3 +121,63 @@ def test_pick_best_path_hit_roadmap_stage_picks_relevant():
    )
    assert chosen is not None
    assert int(chosen["id"]) == 2
 def test_stage_gate_rejects_tritt_when_goal_says_ohne_tritttechnik():
    """Regression: gesprungener Mawashi — Slot Koordination ohne Tritttechnik."""
    goal = "Koordination von Absprung und Beinhebung ohne Tritttechnik"
    assert not exercise_passes_stage_learning_goal_gate(
        learning_goal=goal,
        title="Verbesserung der Trittpräzision des Mawashi Geri und der Hüftbewegung",
        summary="Präzision und Hüftarbeit im Stand",
        semantic_score=0.72,
    )
 def test_stage_gate_accepts_absprung_drill_not_kick_focus():
    goal = "Koordination von Absprung und Beinhebung ohne Tritttechnik"
    assert exercise_passes_stage_learning_goal_gate(
        learning_goal=goal,
        title="Sprungkoordination — Absprung und Beinhebung",
        summary="Ohne Trittausführung, Fokus Gleichgewicht und Timing",
        semantic_score=0.35,
    )
 def test_pick_best_rejects_mawashi_tritt_precision_for_coordination_slot():
    stage_goal = "Koordination von Absprung und Beinhebung ohne Tritttechnik"
    stage_brief = build_stage_match_brief(learning_goal=stage_goal, phase="vertiefung")
    hits = [
        {
            "id": 99,
            "title": "Verbesserung der Trittpräzision des Mawashi Geri und der Hüftbewegung",
            "summary": "Tritttechnik und Hüfte im Stand",
            "score": 0.91,
            "semantic_score": 0.68,
        },
        {
            "id": 100,
            "title": "Absprung und Beinhebung — Koordination ohne Kick",
            "summary": "Sprungvorbereitung, kein Tritt",
            "score": 0.62,
            "semantic_score": 0.41,
        },
    ]
    chosen = pick_best_path_hit(
        hits,
        set(),
        semantic_brief=stage_brief,
        stage_learning_goal=stage_goal,
        roadmap_stage_match=True,
    )
    assert chosen is not None
    assert int(chosen["id"]) == 100
 def test_parse_stage_goal_constraints_extracts_ohne_tritttechnik():
    from planning_exercise_semantics import parse_stage_goal_constraints
    c = parse_stage_goal_constraints("Koordination von Absprung und Beinhebung ohne Tritttechnik")
    assert c.has_negation
    assert "absprung" in c.positive_tokens
    assert any("tritt" in ex for ex in c.exclude_phrases)
--- a/backend/version.py
+++ b/backend/version.py
@ -1,6 +1,6 @@
 # Shinkan Jinkendo Version Information
-APP_VERSION = "0.8.218"
+APP_VERSION = "0.8.220"
 BUILD_DATE = "2026-06-07"
 DB_SCHEMA_VERSION = "20260607088"
@ -53,6 +53,22 @@ MODULE_VERSIONS = {
 }
 CHANGELOG = [
    {
        "version": "0.8.220",
        "date": "2026-06-07",
        "changes": [
            "Roadmap-Stufen-Match: build_stage_match_brief + stage_semantic_score über Titel, Summary und Goal.",
            "Retriever lädt Übungsziele immer bei Stufen-Match; Ranking nach Stufen-Fit statt Gesamtthema.",
        ],
    },
    {
        "version": "0.8.219",
        "date": "2026-06-07",
        "changes": [
            "Roadmap-Stufen-Gate: Negationen (ohne Tritttechnik) + Pflicht-Treffer Absprung/Beinhebung.",
            "anti_patterns in Stufen-Match; Gesamt-Thema allein reicht bei strict_positive nicht mehr.",
        ],
    },
    {
        "version": "0.8.218",
        "date": "2026-06-07",