Enhance Stage Matching and Retrieval Logic in Planning Exercise

- Introduced `build_stage_match_brief` to create stage-specific semantic briefs, improving roadmap matching accuracy. - Updated path retrieval logic to differentiate between general and stage-specific semantic weights, enhancing exercise relevance. - Added support for anti-patterns and success criteria in stage matching, allowing for more nuanced exercise selection. - Enhanced tests to validate new stage matching features and ensure correct functionality against learning goals. - Incremented application version to reflect these updates.
2026-06-10 17:02:21 +02:00 · 2026-06-10 17:02:21 +02:00 · 07e147bc76
commit 07e147bc76
parent 18547613ea
6 changed files with 591 additions and 81 deletions
--- a/backend/planning_exercise_path_builder.py
+++ b/backend/planning_exercise_path_builder.py
@ -32,13 +32,14 @@ from planning_exercise_retrieval import run_multistage_planning_retrieval
 from planning_exercise_semantics import (
    PlanningSemanticBrief,
    apply_path_retrieval_weights,
+    apply_stage_match_retrieval_weights,
    brief_to_summary_dict,
    build_semantic_brief,
+    build_stage_match_brief,
    enrich_target_with_semantic_expectations,
    exercise_passes_path_semantic_gate,
    pick_best_path_hit,
    resolve_semantic_skill_weights,
-    semantic_brief_for_stage,
    step_phase_for_index,
    step_retrieval_query,
    try_enrich_semantic_brief_with_llm,
@ -185,14 +186,18 @@ def _pick_best_path_hit(
    *,
    semantic_brief: Optional[PlanningSemanticBrief] = None,
    stage_learning_goal: Optional[str] = None,
+    stage_anti_patterns: Optional[List[str]] = None,
    roadmap_stage_match: bool = False,
+    stage_match_brief: Optional[PlanningSemanticBrief] = None,
 ) -> Optional[Dict[str, Any]]:
    return pick_best_path_hit(
        hits,
        used_exercise_ids,
        semantic_brief=semantic_brief,
        stage_learning_goal=stage_learning_goal,
+        stage_anti_patterns=stage_anti_patterns,
        roadmap_stage_match=roadmap_stage_match,
+        stage_match_brief=stage_match_brief,
    )


@ -292,6 +297,11 @@ def _run_path_step_retrieval(
    step_phase_override: Optional[str] = None,
    step_target_profile_override: Optional[PlanningTargetProfile] = None,
    stage_learning_goal: Optional[str] = None,
+    stage_anti_patterns: Optional[List[str]] = None,
+    stage_match_brief: Optional[PlanningSemanticBrief] = None,
+    stage_success_criteria: Optional[List[str]] = None,
+    stage_load_profile: Optional[List[str]] = None,
+    path_context_note: Optional[str] = None,
 ) -> Tuple[List[Dict[str, Any]], PlanningTargetProfile, Dict[str, Any], str]:
    step_query = step_query_override or step_retrieval_query(
        semantic_brief, goal_query, step_index, max_steps
@ -328,7 +338,12 @@ def _run_path_step_retrieval(
        "path_step_phase": step_phase_override
        or step_phase_for_index(semantic_brief, step_index, max_steps),
        "stage_learning_goal": (stage_learning_goal or "").strip() or None,
+        "stage_anti_patterns": list(stage_anti_patterns or []),
        "roadmap_stage_match": bool((stage_learning_goal or "").strip()),
+        "stage_match_brief": stage_match_brief,
+        "stage_success_criteria": list(stage_success_criteria or []),
+        "stage_load_profile": list(stage_load_profile or []),
+        "path_context_note": (path_context_note or "").strip() or None,
    }
    pack = apply_progression_context_to_pack(
        cur,
@ -383,6 +398,9 @@ def _run_path_step_retrieval(
            has_planning_reference=has_plan_ref,
        )

+    if pack.get("roadmap_stage_match"):
+        weights = apply_stage_match_retrieval_weights(semantic_brief)
+    else:
        weights = apply_path_retrieval_weights(semantic_brief)

    profile_id = tenant.profile_id
@ -490,6 +508,8 @@ def _annotate_roadmap_step(
    step["roadmap_major_step_index"] = stage_spec.major_step_index
    step["roadmap_phase"] = major_step.phase if major_step else None
    step["roadmap_learning_goal"] = learning_goal or None
+    if stage_spec.anti_patterns:
+        step["roadmap_anti_patterns"] = list(stage_spec.anti_patterns)
    step["roadmap_match_source"] = "stage_spec"
    if skill_expectations:
        step["skill_expectations"] = skill_expectations
@ -569,10 +589,22 @@ def _build_steps_roadmap_first(
        )
        step_kind = resolve_step_exercise_kind_filter(stage_spec, body.exercise_kind_any)
        stage_goal = (stage_spec.learning_goal or "").strip()
-        stage_brief = semantic_brief_for_stage(
-            semantic_brief,
+        stage_anti = list(stage_spec.anti_patterns or [])
+        path_context_note = None
+        if rs_dump:
+            ctx_parts = [
+                str(rs_dump.get("start_situation") or "").strip()[:120],
+                str(rs_dump.get("target_state") or "").strip()[:120],
+                str(rs_dump.get("roadmap_notes") or "").strip()[:120],
+            ]
+            path_context_note = " ".join(p for p in ctx_parts if p)[:240] or None
+        stage_match_brief = build_stage_match_brief(
            learning_goal=stage_goal,
+            anti_patterns=stage_anti,
+            success_criteria=list(stage_spec.success_criteria or []),
+            load_profile=list(stage_spec.load_profile or []),
            phase=major.phase if major else None,
+            path_context_note=path_context_note,
        )

        hits, _, _, _ = _run_path_step_retrieval(
@ -587,21 +619,28 @@ def _build_steps_roadmap_first(
            progression_graph_id=body.progression_graph_id,
            include_llm_intent=body.include_llm_intent and step_index == 0,
            exercise_kind_any=step_kind,
-            semantic_brief=stage_brief,
+            semantic_brief=stage_match_brief,
            path_target_profile=path_target_profile,
            path_intent=path_intent,
            step_query_override=step_query,
            step_phase_override=major.phase if major else None,
            step_target_profile_override=step_target,
            stage_learning_goal=stage_goal or None,
+            stage_anti_patterns=stage_anti or None,
+            stage_match_brief=stage_match_brief,
+            stage_success_criteria=list(stage_spec.success_criteria or []),
+            stage_load_profile=list(stage_spec.load_profile or []),
+            path_context_note=path_context_note,
        )

        hit = _pick_best_path_hit(
            hits,
            used,
-            semantic_brief=stage_brief,
+            semantic_brief=stage_match_brief,
            stage_learning_goal=stage_goal or None,
+            stage_anti_patterns=stage_anti or None,
            roadmap_stage_match=True,
+            stage_match_brief=stage_match_brief,
        )

        if not hit:
--- a/backend/planning_exercise_path_qa.py
+++ b/backend/planning_exercise_path_qa.py
@ -426,12 +426,14 @@ def detect_off_topic_steps(
            brief=step_brief,
            step_phase=phase,
        )
+        stage_anti = list(step.get("roadmap_anti_patterns") or [])
        if stage_goal and not exercise_passes_stage_learning_goal_gate(
            learning_goal=stage_goal,
            title=bundle["title"],
            summary=bundle["summary"],
            goal=bundle["goal"],
            semantic_score=sem,
+            anti_patterns=stage_anti or None,
        ):
            off_topic.append(
                {
--- a/backend/planning_exercise_retrieval.py
+++ b/backend/planning_exercise_retrieval.py
@ -14,11 +14,14 @@ from planning_exercise_profiles import (
    load_exercise_match_profiles_bulk,
    score_exercise_against_target,
 )
+from exercise_ai import strip_html_to_plain
 from planning_exercise_semantics import (
    PlanningSemanticBrief,
+    build_stage_match_brief,
    exercise_passes_path_semantic_gate,
-    exercise_passes_stage_learning_goal_gate,
+    exercise_passes_stage_fit,
    score_exercise_semantic_relevance,
+    score_exercise_stage_fit,
 )

 _MAX_LIBRARY_ROWS = 8000
@ -149,7 +152,7 @@ def _load_exercise_goals_chunked(cur, exercise_ids: Sequence[int], *, batch: int
        ph = ",".join(["%s"] * len(chunk))
        cur.execute(f"SELECT id, goal FROM exercises WHERE id IN ({ph})", chunk)
        for row in cur.fetchall():
-            out[int(row["id"])] = str(row.get("goal") or "")
+            out[int(row["id"])] = strip_html_to_plain(row.get("goal"), max_len=1200)
    return out


@ -203,6 +206,19 @@ def rank_visible_library_hits(
    path_mode = pack.get("context_mode") == "progression_path"
    stage_learning_goal = (pack.get("stage_learning_goal") or "").strip()
    roadmap_stage_match = bool(pack.get("roadmap_stage_match"))
+    stage_match_brief_raw = pack.get("stage_match_brief")
+    stage_match_brief: Optional[PlanningSemanticBrief] = None
+    if isinstance(stage_match_brief_raw, PlanningSemanticBrief):
+        stage_match_brief = stage_match_brief_raw
+    elif roadmap_stage_match and stage_learning_goal:
+        stage_match_brief = build_stage_match_brief(
+            learning_goal=stage_learning_goal,
+            anti_patterns=pack.get("stage_anti_patterns"),
+            success_criteria=pack.get("stage_success_criteria"),
+            load_profile=pack.get("stage_load_profile"),
+            phase=step_phase,
+            path_context_note=pack.get("path_context_note"),
+        )

    last_planned_skills: Set[int] = set()
    planned_ids = pack.get("planned_exercise_ids") or []
@ -229,7 +245,11 @@ def rank_visible_library_hits(
    skills_by_ex = _load_skill_sets_chunked(cur, cand_ids)
    goals_by_ex: Dict[int, str] = {}
    variants_by_ex: Dict[int, List[str]] = {}
-    if semantic_brief and semantic_brief.semantic_strength > 0.05:
+    need_exercise_semantic_text = (
+        (semantic_brief and semantic_brief.semantic_strength > 0.05)
+        or (stage_match_brief and stage_match_brief.semantic_strength > 0.05)
+    )
+    if need_exercise_semantic_text:
        goals_by_ex = _load_exercise_goals_chunked(cur, cand_ids)
        variants_by_ex = _load_variant_names_chunked(cur, cand_ids)

@ -270,52 +290,75 @@ def rank_visible_library_hits(
                emp, target, intent=intent
            )

+        title_s = str(row.get("title") or "")
+        summary_s = str(row.get("summary") or "")
+        goal_s = goals_by_ex.get(eid, "")
+
        semantic_score = 0.0
        semantic_reasons: List[str] = []
        if semantic_brief and semantic_brief.semantic_strength > 0.05:
            semantic_score, semantic_reasons = score_exercise_semantic_relevance(
-                title=str(row.get("title") or ""),
-                summary=str(row.get("summary") or ""),
-                goal=goals_by_ex.get(eid, ""),
+                title=title_s,
+                summary=summary_s,
+                goal=goal_s,
                variant_names=variants_by_ex.get(eid, []),
                brief=semantic_brief,
                step_phase=step_phase,
            )

+        stage_semantic_score = 0.0
+        stage_semantic_reasons: List[str] = []
+        if stage_match_brief and stage_match_brief.semantic_strength > 0.05:
+            stage_semantic_score, stage_semantic_reasons = score_exercise_stage_fit(
+                title=title_s,
+                summary=summary_s,
+                goal=goal_s,
+                variant_names=variants_by_ex.get(eid, []),
+                stage_brief=stage_match_brief,
+                step_phase=step_phase,
+            )
+
+        effective_semantic = (
+            stage_semantic_score
+            if roadmap_stage_match and stage_match_brief
+            else semantic_score
+        )
+
        score_penalty = 0.0
        stage_match_reason: Optional[str] = None
        if (
            path_mode
+            and not roadmap_stage_match
            and semantic_brief
            and semantic_brief.semantic_strength >= 0.55
            and not exercise_passes_path_semantic_gate(
                semantic_score=semantic_score,
-                title=str(row.get("title") or ""),
-                summary=str(row.get("summary") or ""),
-                goal=goals_by_ex.get(eid, ""),
+                title=title_s,
+                summary=summary_s,
+                goal=goal_s,
                brief=semantic_brief,
                strict=True,
            )
        ):
            score_penalty = 0.42
        if roadmap_stage_match and stage_learning_goal:
-            title_s = str(row.get("title") or "")
-            summary_s = str(row.get("summary") or "")
-            goal_s = goals_by_ex.get(eid, "")
-            if exercise_passes_stage_learning_goal_gate(
+            if exercise_passes_stage_fit(
                learning_goal=stage_learning_goal,
                title=title_s,
                summary=summary_s,
                goal=goal_s,
-                semantic_score=semantic_score,
+                stage_brief=stage_match_brief,
+                stage_semantic_score=stage_semantic_score,
+                anti_patterns=pack.get("stage_anti_patterns"),
+                step_phase=step_phase,
            ):
-                score_penalty = max(0.0, score_penalty - 0.08)
+                score_penalty = max(0.0, score_penalty - 0.10)
                stage_match_reason = "Passt zum Stufen-Lernziel"
            else:
-                score_penalty += 0.35
+                score_penalty += 0.48

        score = (
-            weights.get("semantic", 0.0) * semantic_score
+            weights.get("semantic", 0.0) * effective_semantic
            + weights["fulltext"] * ft_norm
            + weights["progression"] * prog_hit
            + weights["skill"] * skill_sim
@ -329,7 +372,11 @@ def rank_visible_library_hits(
        reasons: List[str] = []
        if stage_match_reason:
            reasons.append(stage_match_reason)
-        if semantic_score >= 0.35 and semantic_reasons:
+        if roadmap_stage_match and stage_semantic_score >= 0.30 and stage_semantic_reasons:
+            for sr in stage_semantic_reasons:
+                if sr not in reasons:
+                    reasons.append(sr)
+        elif semantic_score >= 0.35 and semantic_reasons:
            for sr in semantic_reasons:
                if sr not in reasons:
                    reasons.append(sr)
@ -365,6 +412,8 @@ def rank_visible_library_hits(
                "score": round(max(0.0, min(1.0, score)), 4),
                "reasons": reasons,
                "semantic_score": round(semantic_score, 4),
+                "stage_semantic_score": round(stage_semantic_score, 4),
+                "goal": goal_s,
            }
        )
        succ_variants = pack.get("progression_successor_variants") or {}
--- a/backend/planning_exercise_semantics.py
+++ b/backend/planning_exercise_semantics.py
@ -9,6 +9,7 @@ from __future__ import annotations
 import json
 import logging
 import re
+from dataclasses import dataclass, field
 from typing import Any, Dict, List, Mapping, Optional, Sequence, Tuple

 from pydantic import BaseModel, Field, field_validator
@ -462,7 +463,7 @@ def score_exercise_semantic_relevance(

    core_hits = sum(1 for ph in core if _phrase_in_blob(ph, blob))
    must_hits = sum(1 for ph in must if _phrase_in_blob(ph, blob))
-    exclude_hits = sum(1 for ph in exclude if _phrase_in_blob(ph, blob))
+    exclude_hits = sum(1 for ph in exclude if _phrase_excluded_in_blob(ph, blob))

    score = 0.0
    if core:
@ -623,9 +624,82 @@ _STAGE_GOAL_STOPWORDS = _QUERY_STOPWORDS | frozenset(
 )


-def _significant_stage_tokens(learning_goal: str) -> List[str]:
-    """Wörter aus Stufen-Lernziel für Text-Match (ohne Füllwörter)."""
-    raw = re.findall(r"[a-zäöüß]{4,}", _normalize_phrase(learning_goal), flags=re.IGNORECASE)
+_STAGE_NEGATION_PATTERNS = (
+    r"\bohne\s+([^,.;]+)",
+    r"\bkein(?:e|en|er|em)?\s+([^,.;]+)",
+    r"\bnicht\s+([^,.;]+)",
+)
+
+# Aus „ohne Tritttechnik“ etc. — erweiterte Treffer im Übungstext
+_STAGE_EXCLUDE_ALIASES: Dict[str, Tuple[str, ...]] = {
+    "tritttechnik": (
+        "tritttechnik",
+        "trittpraezision",
+        "trittpräzision",
+        "tritt praesision",
+        "tritt-präzision",
+        "kicktechnik",
+        "tritt ausführung",
+        "tritt ausfuehrung",
+    ),
+    "kumite": ("kumite", "partnerkampf", "freikampf", "jiyu kumite"),
+    "kraftuebung": ("kraftuebung", "kraftübung", "krafttraining", "kraftübungen"),
+    "anwendung": ("kumite anwendung", "kampfanwendung"),
+}
+
+_STAGE_FOCUS_TOKENS = frozenset(
+    {
+        "koordination",
+        "absprung",
+        "beinhebung",
+        "landung",
+        "sprung",
+        "sprungphase",
+        "balance",
+        "gleichgewicht",
+        "timing",
+        "vorbereitung",
+        "athletik",
+        "mobilitaet",
+        "mobilität",
+        "stabilisation",
+        "stabilisierung",
+    }
+)
+
+
+@dataclass
+class StageGoalConstraints:
+    positive_tokens: List[str] = field(default_factory=list)
+    exclude_phrases: List[str] = field(default_factory=list)
+    has_negation: bool = False
+    strict_positive: bool = False
+
+
+def _expand_stage_exclude_phrase(phrase: str) -> List[str]:
+    norm = _normalize_phrase(phrase)
+    if not norm:
+        return []
+    out: List[str] = [norm]
+    compact = norm.replace(" ", "")
+    if compact and compact not in out:
+        out.append(compact)
+    for key, aliases in _STAGE_EXCLUDE_ALIASES.items():
+        if key in norm or norm in key:
+            for alias in aliases:
+                a = _normalize_phrase(alias)
+                if a and a not in out:
+                    out.append(a)
+    return out[:12]
+
+
+def _significant_stage_tokens(learning_goal: str, *, strip_negated: bool = True) -> List[str]:
+    """Wörter aus Stufen-Lernziel für Text-Match (ohne Füllwörter, ohne Negationssegmente)."""
+    text = _normalize_phrase(learning_goal)
+    if strip_negated:
+        for pat in _STAGE_NEGATION_PATTERNS:
+            text = re.sub(pat, " ", text)
+    raw = re.findall(r"[a-zäöüß]{4,}", text, flags=re.IGNORECASE)
    out: List[str] = []
    for w in raw:
        low = w.lower().replace("ä", "ae").replace("ö", "oe").replace("ü", "ue")
@ -636,19 +710,245 @@ def _significant_stage_tokens(learning_goal: str) -> List[str]:
    return out[:10]


+def parse_stage_goal_constraints(
+    learning_goal: str,
+    anti_patterns: Optional[Sequence[str]] = None,
+) -> StageGoalConstraints:
+    """Positiv/Negativ aus Stufen-Lernziel + anti_patterns (Roadmap-Stufe)."""
+    lg = (learning_goal or "").strip()
+    if len(lg) < 3:
+        return StageGoalConstraints()
+
+    norm = _normalize_phrase(lg)
+    exclude: List[str] = []
+    has_negation = False
+    for pat in _STAGE_NEGATION_PATTERNS:
+        for m in re.finditer(pat, norm):
+            has_negation = True
+            chunk = (m.group(1) or "").strip()
+            if chunk:
+                exclude.extend(_expand_stage_exclude_phrase(chunk))
+
+    for raw in anti_patterns or []:
+        s = _normalize_phrase(str(raw or ""))
+        if s:
+            exclude.extend(_expand_stage_exclude_phrase(s))
+
+    positive = _significant_stage_tokens(lg, strip_negated=True)
+    focus_hits = [t for t in positive if t in _STAGE_FOCUS_TOKENS]
+    strict_positive = bool(focus_hits) or has_negation
+
+    dedup_exclude: List[str] = []
+    for item in exclude:
+        if item and item not in dedup_exclude:
+            dedup_exclude.append(item)
+
+    return StageGoalConstraints(
+        positive_tokens=positive,
+        exclude_phrases=dedup_exclude[:16],
+        has_negation=has_negation,
+        strict_positive=strict_positive,
+    )
+
+
+def _phrase_excluded_in_blob(phrase: str, blob: str) -> bool:
+    """Treffer nur wenn das Ausschluss-Thema nicht selbst negiert beschrieben ist."""
+    if not phrase or not blob:
+        return False
+    if not _phrase_in_blob(phrase, blob):
+        return False
+    norm = _normalize_phrase(phrase)
+    for pat in _STAGE_NEGATION_PATTERNS:
+        for m in re.finditer(pat, blob):
+            chunk = _normalize_phrase(m.group(1) or "")
+            if not chunk:
+                continue
+            if norm in chunk or chunk in norm or _phrase_in_blob(norm, chunk):
+                return False
+    return True
+
+
+def _blob_matches_stage_excludes(blob: str, exclude_phrases: Sequence[str]) -> bool:
+    for phrase in exclude_phrases:
+        if _phrase_excluded_in_blob(phrase, blob):
+            return True
+    return False
+
+
+_MIN_STAGE_FIT_SEMANTIC = 0.30
+_MIN_STAGE_FIT_RELAXED = 0.20
+
+
+def build_stage_match_brief(
+    *,
+    learning_goal: str,
+    anti_patterns: Optional[Sequence[str]] = None,
+    success_criteria: Optional[Sequence[str]] = None,
+    load_profile: Optional[Sequence[str]] = None,
+    phase: Optional[str] = None,
+    path_context_note: Optional[str] = None,
+) -> PlanningSemanticBrief:
+    """
+    Stufen-zentrierter Semantik-Brief — unabhängig vom Gesamt-Pfad-Thema.
+
+    Primär für Roadmap-Match: Bewertung gegen Titel + Kurzbeschreibung + Übungsziel.
+    """
+    lg = (learning_goal or "").strip()
+    if len(lg) < 3:
+        return PlanningSemanticBrief(semantic_strength=0.0)
+
+    constraints = parse_stage_goal_constraints(lg, anti_patterns)
+    must: List[str] = []
+    norm_lg = _normalize_phrase(lg)
+    for token in constraints.positive_tokens:
+        if token not in must:
+            must.append(token)
+    if norm_lg and norm_lg not in must:
+        must.append(norm_lg[:120])
+    for raw in success_criteria or []:
+        s = _normalize_phrase(str(raw or ""))
+        if s and s not in must:
+            must.append(s[:100])
+    for raw in load_profile or []:
+        s = _normalize_phrase(str(raw or ""))
+        if s and s not in must:
+            must.append(s[:60])
+
+    retrieval_parts = [norm_lg]
+    if path_context_note:
+        note = _normalize_phrase(path_context_note)[:200]
+        if note:
+            retrieval_parts.append(note)
+
+    arc: List[str] = []
+    ph = (phase or "").strip().lower()
+    if ph:
+        arc.append(ph)
+
+    return PlanningSemanticBrief(
+        primary_topic="",
+        topic_type="focus",
+        must_phrases=must[:12],
+        exclude_phrases=list(constraints.exclude_phrases)[:12],
+        development_arc=arc[:4],
+        retrieval_query=" ".join(p for p in retrieval_parts if p)[:500],
+        semantic_strength=0.78,
+        rationale="stage_match_brief",
+    )
+
+
+def score_exercise_stage_fit(
+    *,
+    title: str,
+    summary: str,
+    goal: str,
+    stage_brief: PlanningSemanticBrief,
+    variant_names: Optional[Sequence[str]] = None,
+    step_phase: Optional[str] = None,
+) -> Tuple[float, List[str]]:
+    """Semantik-Score Übung ↔ Stufen-Lernziel (Titel + Summary + Goal)."""
+    score, reasons = score_exercise_semantic_relevance(
+        title=title,
+        summary=summary,
+        goal=goal,
+        variant_names=variant_names or [],
+        brief=stage_brief,
+        step_phase=step_phase,
+    )
+    blob = _blob_from_fields(title, summary, goal, variant_names or [])
+    focus_tokens = [
+        t
+        for t in (stage_brief.must_phrases or [])
+        if t and " " not in t and len(t) >= 4
+    ][:6]
+    if focus_tokens:
+        hits = sum(1 for t in focus_tokens if _phrase_in_blob(t, blob))
+        ratio = hits / len(focus_tokens)
+        bonus = 0.28 * ratio
+        if bonus > 0:
+            score = min(1.0, score + bonus)
+            if hits >= max(1, len(focus_tokens) // 2):
+                reasons = ["Stufen-Schwerpunkte im Übungstext", *reasons]
+    return max(0.0, min(1.0, round(score, 4))), reasons[:4]
+
+
+def exercise_passes_stage_fit(
+    *,
+    learning_goal: str,
+    title: str,
+    summary: str = "",
+    goal: str = "",
+    stage_brief: Optional[PlanningSemanticBrief] = None,
+    stage_semantic_score: Optional[float] = None,
+    anti_patterns: Optional[Sequence[str]] = None,
+    step_phase: Optional[str] = None,
+    min_stage_semantic: float = _MIN_STAGE_FIT_SEMANTIC,
+    relaxed: bool = False,
+) -> bool:
+    """Allgemeines Stufen-Fit-Gate: voller Übungstext vs. Stufen-Brief."""
+    lg = (learning_goal or "").strip()
+    if len(lg) < 3:
+        return True
+
+    blob = _blob_from_fields(title, summary, goal, [])
+    constraints = parse_stage_goal_constraints(lg, anti_patterns)
+    if constraints.exclude_phrases and _blob_matches_stage_excludes(blob, constraints.exclude_phrases):
+        return False
+
+    brief = stage_brief or build_stage_match_brief(
+        learning_goal=lg,
+        anti_patterns=anti_patterns,
+    )
+    stage_sem = stage_semantic_score
+    if stage_sem is None:
+        stage_sem, _ = score_exercise_stage_fit(
+            title=title,
+            summary=summary,
+            goal=goal,
+            stage_brief=brief,
+            step_phase=step_phase,
+        )
+
+    threshold = _MIN_STAGE_FIT_RELAXED if relaxed else min_stage_semantic
+    return float(stage_sem or 0.0) >= threshold
+
+
+def apply_stage_match_retrieval_weights(brief: PlanningSemanticBrief) -> Dict[str, float]:
+    """Roadmap-Stufe: Stufen-Semantik (Ziel/Summary/Goal) dominiert."""
+    return {
+        "semantic": 0.58,
+        "fulltext": 0.14,
+        "profile": 0.18,
+        "progression": 0.04,
+        "skill": 0.04,
+        "plan": 0.02,
+        "repeat_unit": -0.40,
+        "repeat_group": -0.15,
+    }
+
+
 def semantic_brief_for_stage(
    brief: PlanningSemanticBrief,
    *,
    learning_goal: str,
    phase: Optional[str] = None,
+    anti_patterns: Optional[Sequence[str]] = None,
 ) -> PlanningSemanticBrief:
-    """Brief um Stufen-Lernziel erweitern — für Roadmap-Match pro Major Step."""
+    """Legacy: globalen Brief anreichern — bevorzugt build_stage_match_brief für Roadmap-Match."""
    lg = _normalize_phrase(learning_goal)
    if not lg:
        return brief
+    constraints = parse_stage_goal_constraints(learning_goal, anti_patterns)
    must = list(brief.must_phrases or [])
+    for token in constraints.positive_tokens[:4]:
+        if token not in must:
+            must.append(token)
    if lg not in must:
        must.insert(0, lg[:120])
+    exclude = list(brief.exclude_phrases or [])
+    for item in constraints.exclude_phrases:
+        if item not in exclude:
+            exclude.append(item)
    arc = list(brief.development_arc or [])
    ph = (phase or "").strip().lower()
    if ph and ph not in arc:
@ -657,6 +957,7 @@ def semantic_brief_for_stage(
    return brief.model_copy(
        update={
            "must_phrases": must[:12],
+            "exclude_phrases": exclude[:12],
            "development_arc": arc[:8],
            "semantic_strength": min(1.0, strength),
        }
@ -672,33 +973,24 @@ def exercise_passes_stage_learning_goal_gate(
    semantic_score: float = 0.0,
    min_semantic: float = 0.20,
    relaxed: bool = False,
+    anti_patterns: Optional[Sequence[str]] = None,
+    stage_brief: Optional[PlanningSemanticBrief] = None,
+    stage_semantic_score: Optional[float] = None,
+    step_phase: Optional[str] = None,
 ) -> bool:
-    """Roadmap-Stufe: Übung muss zum Stufen-Lernziel passen, nicht nur zum Gesamtthema."""
-    lg = (learning_goal or "").strip()
-    if len(lg) < 3:
-        return True
-
-    blob = _blob_from_fields(title, summary, goal, [])
-    norm_lg = _normalize_phrase(lg)
-    if _phrase_in_blob(norm_lg, blob):
-        return True
-
-    tokens = _significant_stage_tokens(lg)
-    if not tokens:
-        threshold = 0.12 if relaxed else min_semantic
-        return semantic_score >= threshold
-
-    hits = sum(1 for t in tokens if _phrase_in_blob(t, blob))
-    if len(tokens) <= 2:
-        required = 1
-    else:
-        required = max(2, (len(tokens) + 1) // 2)
-
-    if hits >= required:
-        return True
-
-    threshold = 0.14 if relaxed else min_semantic
-    return semantic_score >= threshold
+    """Roadmap-Stufe: delegiert an exercise_passes_stage_fit (Titel + Summary + Goal)."""
+    del semantic_score, min_semantic
+    return exercise_passes_stage_fit(
+        learning_goal=learning_goal,
+        title=title,
+        summary=summary,
+        goal=goal,
+        stage_brief=stage_brief,
+        stage_semantic_score=stage_semantic_score,
+        anti_patterns=anti_patterns,
+        step_phase=step_phase,
+        relaxed=relaxed,
+    )


 def exercise_passes_path_semantic_gate(
@ -739,7 +1031,9 @@ def pick_best_path_hit(
    *,
    semantic_brief: Optional[PlanningSemanticBrief] = None,
    stage_learning_goal: Optional[str] = None,
+    stage_anti_patterns: Optional[Sequence[str]] = None,
    roadmap_stage_match: bool = False,
+    stage_match_brief: Optional[PlanningSemanticBrief] = None,
 ) -> Optional[Dict[str, Any]]:
    """Gestufte Auswahl: strikt → relaxed → optional Notfall-Fallback."""
    if not hits:
@ -747,6 +1041,13 @@ def pick_best_path_hit(

    stage_goal = (stage_learning_goal or "").strip()

+    stage_brief: Optional[PlanningSemanticBrief] = stage_match_brief
+    if roadmap_stage_match and stage_goal and stage_brief is None:
+        stage_brief = build_stage_match_brief(
+            learning_goal=stage_goal,
+            anti_patterns=stage_anti_patterns,
+        )
+
    def _scan(*, strict: bool) -> Optional[Dict[str, Any]]:
        best: Optional[Dict[str, Any]] = None
        best_key: Tuple[float, float] = (-1.0, -1.0)
@ -754,28 +1055,38 @@ def pick_best_path_hit(
            eid = int(hit["id"])
            if eid in used_exercise_ids:
                continue
-            sem = float(hit.get("semantic_score") or 0.0)
            title = str(hit.get("title") or "")
            summary = str(hit.get("summary") or "")
+            goal_text = str(hit.get("goal") or hit.get("exercise_goal") or "")
+            sem = float(hit.get("semantic_score") or 0.0)
+            stage_sem = float(hit.get("stage_semantic_score") or sem)
+
+            if roadmap_stage_match and stage_goal:
+                if not exercise_passes_stage_fit(
+                    learning_goal=stage_goal,
+                    title=title,
+                    summary=summary,
+                    goal=goal_text,
+                    stage_brief=stage_brief,
+                    stage_semantic_score=stage_sem,
+                    anti_patterns=stage_anti_patterns,
+                    relaxed=not strict,
+                ):
+                    continue
+            else:
                if semantic_brief and not exercise_passes_path_semantic_gate(
                    semantic_score=sem,
                    title=title,
                    summary=summary,
-                goal="",
+                    goal=goal_text,
                    brief=semantic_brief,
                    strict=strict,
                ):
                    continue
-            if stage_goal and not exercise_passes_stage_learning_goal_gate(
-                learning_goal=stage_goal,
-                title=title,
-                summary=summary,
-                semantic_score=sem,
-                relaxed=not strict,
-            ):
-                continue
+
            score = float(hit.get("score") or 0.0)
-            key = (sem, score)
+            rank_sem = stage_sem if roadmap_stage_match and stage_goal else sem
+            key = (rank_sem, score)
            if key > best_key:
                best_key = key
                best = hit
@ -820,9 +1131,15 @@ __all__ = [
    "build_semantic_brief",
    "enrich_target_with_semantic_expectations",
    "exercise_passes_path_semantic_gate",
+    "StageGoalConstraints",
+    "apply_stage_match_retrieval_weights",
+    "build_stage_match_brief",
+    "exercise_passes_stage_fit",
    "exercise_passes_stage_learning_goal_gate",
    "merge_semantic_brief_llm",
+    "parse_stage_goal_constraints",
    "pick_best_path_hit",
+    "score_exercise_stage_fit",
    "semantic_brief_for_stage",
    "resolve_semantic_skill_weights",
    "score_exercise_semantic_relevance",
--- a/backend/tests/test_planning_roadmap_stage_match.py
+++ b/backend/tests/test_planning_roadmap_stage_match.py
@ -1,7 +1,9 @@
 """Tests Roadmap-Stufen-Match — Gate gegen themenfremde Übungen."""
 from planning_exercise_semantics import (
+    build_stage_match_brief,
    exercise_passes_stage_learning_goal_gate,
    pick_best_path_hit,
+    score_exercise_stage_fit,
    semantic_brief_for_stage,
    build_semantic_brief,
 )
@ -35,10 +37,37 @@ def test_semantic_brief_for_stage_adds_learning_goal():
    assert "hüftmobilität und kammerhaltung" in stage.must_phrases[0]


+def test_build_stage_match_brief_uses_stage_tokens_not_global_topic():
+    brief = build_stage_match_brief(
+        learning_goal="Koordination von Absprung und Beinhebung ohne Tritttechnik",
+        phase="vertiefung",
+    )
+    must_blob = " ".join(brief.must_phrases or []).lower()
+    assert "mawashi" not in must_blob
+    assert "absprung" in must_blob
+    assert not (brief.primary_topic or "").strip()
+
+
+def test_stage_fit_prefers_goal_over_misleading_title():
+    stage_goal = "Koordination von Absprung und Beinhebung ohne Tritttechnik"
+    stage_brief = build_stage_match_brief(learning_goal=stage_goal)
+    kick_score, _ = score_exercise_stage_fit(
+        title="Mawashi Geri Trittpräzision",
+        summary="Kicktechnik",
+        goal="Präzision im Tritt und Hüftarbeit",
+        stage_brief=stage_brief,
+    )
+    coord_score, _ = score_exercise_stage_fit(
+        title="Allgemeines Sprungtraining",
+        summary="Athletik",
+        goal="Absprung, Beinhebung und Landung koordinieren — ohne Trittausführung",
+        stage_brief=stage_brief,
+    )
+    assert coord_score > kick_score
+
+
 def test_pick_best_path_hit_roadmap_stage_no_weak_fallback():
-    brief = build_semantic_brief("Mae Geri Perfektion")
-    stage_brief = semantic_brief_for_stage(
-        brief,
+    stage_brief = build_stage_match_brief(
        learning_goal="Hüftmobilität für Mae Geri",
        phase="grundlage",
    )
@ -69,9 +98,7 @@ def test_pick_best_path_hit_roadmap_stage_no_weak_fallback():


 def test_pick_best_path_hit_roadmap_stage_picks_relevant():
-    brief = build_semantic_brief("Mae Geri Perfektion")
-    stage_brief = semantic_brief_for_stage(
-        brief,
+    stage_brief = build_stage_match_brief(
        learning_goal="Hüftmobilität für Mae Geri",
        phase="grundlage",
    )
@ -94,3 +121,63 @@ def test_pick_best_path_hit_roadmap_stage_picks_relevant():
    )
    assert chosen is not None
    assert int(chosen["id"]) == 2
+
+
+def test_stage_gate_rejects_tritt_when_goal_says_ohne_tritttechnik():
+    """Regression: gesprungener Mawashi — Slot Koordination ohne Tritttechnik."""
+    goal = "Koordination von Absprung und Beinhebung ohne Tritttechnik"
+    assert not exercise_passes_stage_learning_goal_gate(
+        learning_goal=goal,
+        title="Verbesserung der Trittpräzision des Mawashi Geri und der Hüftbewegung",
+        summary="Präzision und Hüftarbeit im Stand",
+        semantic_score=0.72,
+    )
+
+
+def test_stage_gate_accepts_absprung_drill_not_kick_focus():
+    goal = "Koordination von Absprung und Beinhebung ohne Tritttechnik"
+    assert exercise_passes_stage_learning_goal_gate(
+        learning_goal=goal,
+        title="Sprungkoordination — Absprung und Beinhebung",
+        summary="Ohne Trittausführung, Fokus Gleichgewicht und Timing",
+        semantic_score=0.35,
+    )
+
+
+def test_pick_best_rejects_mawashi_tritt_precision_for_coordination_slot():
+    stage_goal = "Koordination von Absprung und Beinhebung ohne Tritttechnik"
+    stage_brief = build_stage_match_brief(learning_goal=stage_goal, phase="vertiefung")
+    hits = [
+        {
+            "id": 99,
+            "title": "Verbesserung der Trittpräzision des Mawashi Geri und der Hüftbewegung",
+            "summary": "Tritttechnik und Hüfte im Stand",
+            "score": 0.91,
+            "semantic_score": 0.68,
+        },
+        {
+            "id": 100,
+            "title": "Absprung und Beinhebung — Koordination ohne Kick",
+            "summary": "Sprungvorbereitung, kein Tritt",
+            "score": 0.62,
+            "semantic_score": 0.41,
+        },
+    ]
+    chosen = pick_best_path_hit(
+        hits,
+        set(),
+        semantic_brief=stage_brief,
+        stage_learning_goal=stage_goal,
+        roadmap_stage_match=True,
+    )
+    assert chosen is not None
+    assert int(chosen["id"]) == 100
+
+
+def test_parse_stage_goal_constraints_extracts_ohne_tritttechnik():
+    from planning_exercise_semantics import parse_stage_goal_constraints
+
+    c = parse_stage_goal_constraints("Koordination von Absprung und Beinhebung ohne Tritttechnik")
+    assert c.has_negation
+    assert "absprung" in c.positive_tokens
+    assert any("tritt" in ex for ex in c.exclude_phrases)
--- a/backend/version.py
+++ b/backend/version.py
@ -1,6 +1,6 @@
 # Shinkan Jinkendo Version Information

-APP_VERSION = "0.8.218"
+APP_VERSION = "0.8.220"
 BUILD_DATE = "2026-06-07"
 DB_SCHEMA_VERSION = "20260607088"

@ -53,6 +53,22 @@ MODULE_VERSIONS = {
 }

 CHANGELOG = [
+    {
+        "version": "0.8.220",
+        "date": "2026-06-07",
+        "changes": [
+            "Roadmap-Stufen-Match: build_stage_match_brief + stage_semantic_score über Titel, Summary und Goal.",
+            "Retriever lädt Übungsziele immer bei Stufen-Match; Ranking nach Stufen-Fit statt Gesamtthema.",
+        ],
+    },
+    {
+        "version": "0.8.219",
+        "date": "2026-06-07",
+        "changes": [
+            "Roadmap-Stufen-Gate: Negationen (ohne Tritttechnik) + Pflicht-Treffer Absprung/Beinhebung.",
+            "anti_patterns in Stufen-Match; Gesamt-Thema allein reicht bei strict_positive nicht mehr.",
+        ],
+    },
    {
        "version": "0.8.218",
        "date": "2026-06-07",