Enhance Stage Matching and Retrieval Logic in Planning Exercise
All checks were successful
Deploy Development / deploy (push) Successful in 43s
Test Suite / pytest-backend (push) Successful in 44s
Test Suite / lint-backend (push) Successful in 0s
Test Suite / build-frontend (push) Successful in 13s
Test Suite / k6 /health Baseline (push) Successful in 34s
Test Suite / playwright-tests (push) Successful in 1m16s

- Introduced `build_stage_match_brief` to create stage-specific semantic briefs, improving roadmap matching accuracy.
- Updated path retrieval logic to differentiate between general and stage-specific semantic weights, enhancing exercise relevance.
- Added support for anti-patterns and success criteria in stage matching, allowing for more nuanced exercise selection.
- Enhanced tests to validate new stage matching features and ensure correct functionality against learning goals.
- Incremented application version to reflect these updates.
This commit is contained in:
Lars 2026-06-10 17:02:21 +02:00
parent 18547613ea
commit 07e147bc76
6 changed files with 591 additions and 81 deletions

View File

@ -32,13 +32,14 @@ from planning_exercise_retrieval import run_multistage_planning_retrieval
from planning_exercise_semantics import (
PlanningSemanticBrief,
apply_path_retrieval_weights,
apply_stage_match_retrieval_weights,
brief_to_summary_dict,
build_semantic_brief,
build_stage_match_brief,
enrich_target_with_semantic_expectations,
exercise_passes_path_semantic_gate,
pick_best_path_hit,
resolve_semantic_skill_weights,
semantic_brief_for_stage,
step_phase_for_index,
step_retrieval_query,
try_enrich_semantic_brief_with_llm,
@ -185,14 +186,18 @@ def _pick_best_path_hit(
*,
semantic_brief: Optional[PlanningSemanticBrief] = None,
stage_learning_goal: Optional[str] = None,
stage_anti_patterns: Optional[List[str]] = None,
roadmap_stage_match: bool = False,
stage_match_brief: Optional[PlanningSemanticBrief] = None,
) -> Optional[Dict[str, Any]]:
return pick_best_path_hit(
hits,
used_exercise_ids,
semantic_brief=semantic_brief,
stage_learning_goal=stage_learning_goal,
stage_anti_patterns=stage_anti_patterns,
roadmap_stage_match=roadmap_stage_match,
stage_match_brief=stage_match_brief,
)
@ -292,6 +297,11 @@ def _run_path_step_retrieval(
step_phase_override: Optional[str] = None,
step_target_profile_override: Optional[PlanningTargetProfile] = None,
stage_learning_goal: Optional[str] = None,
stage_anti_patterns: Optional[List[str]] = None,
stage_match_brief: Optional[PlanningSemanticBrief] = None,
stage_success_criteria: Optional[List[str]] = None,
stage_load_profile: Optional[List[str]] = None,
path_context_note: Optional[str] = None,
) -> Tuple[List[Dict[str, Any]], PlanningTargetProfile, Dict[str, Any], str]:
step_query = step_query_override or step_retrieval_query(
semantic_brief, goal_query, step_index, max_steps
@ -328,7 +338,12 @@ def _run_path_step_retrieval(
"path_step_phase": step_phase_override
or step_phase_for_index(semantic_brief, step_index, max_steps),
"stage_learning_goal": (stage_learning_goal or "").strip() or None,
"stage_anti_patterns": list(stage_anti_patterns or []),
"roadmap_stage_match": bool((stage_learning_goal or "").strip()),
"stage_match_brief": stage_match_brief,
"stage_success_criteria": list(stage_success_criteria or []),
"stage_load_profile": list(stage_load_profile or []),
"path_context_note": (path_context_note or "").strip() or None,
}
pack = apply_progression_context_to_pack(
cur,
@ -383,6 +398,9 @@ def _run_path_step_retrieval(
has_planning_reference=has_plan_ref,
)
if pack.get("roadmap_stage_match"):
weights = apply_stage_match_retrieval_weights(semantic_brief)
else:
weights = apply_path_retrieval_weights(semantic_brief)
profile_id = tenant.profile_id
@ -490,6 +508,8 @@ def _annotate_roadmap_step(
step["roadmap_major_step_index"] = stage_spec.major_step_index
step["roadmap_phase"] = major_step.phase if major_step else None
step["roadmap_learning_goal"] = learning_goal or None
if stage_spec.anti_patterns:
step["roadmap_anti_patterns"] = list(stage_spec.anti_patterns)
step["roadmap_match_source"] = "stage_spec"
if skill_expectations:
step["skill_expectations"] = skill_expectations
@ -569,10 +589,22 @@ def _build_steps_roadmap_first(
)
step_kind = resolve_step_exercise_kind_filter(stage_spec, body.exercise_kind_any)
stage_goal = (stage_spec.learning_goal or "").strip()
stage_brief = semantic_brief_for_stage(
semantic_brief,
stage_anti = list(stage_spec.anti_patterns or [])
path_context_note = None
if rs_dump:
ctx_parts = [
str(rs_dump.get("start_situation") or "").strip()[:120],
str(rs_dump.get("target_state") or "").strip()[:120],
str(rs_dump.get("roadmap_notes") or "").strip()[:120],
]
path_context_note = " ".join(p for p in ctx_parts if p)[:240] or None
stage_match_brief = build_stage_match_brief(
learning_goal=stage_goal,
anti_patterns=stage_anti,
success_criteria=list(stage_spec.success_criteria or []),
load_profile=list(stage_spec.load_profile or []),
phase=major.phase if major else None,
path_context_note=path_context_note,
)
hits, _, _, _ = _run_path_step_retrieval(
@ -587,21 +619,28 @@ def _build_steps_roadmap_first(
progression_graph_id=body.progression_graph_id,
include_llm_intent=body.include_llm_intent and step_index == 0,
exercise_kind_any=step_kind,
semantic_brief=stage_brief,
semantic_brief=stage_match_brief,
path_target_profile=path_target_profile,
path_intent=path_intent,
step_query_override=step_query,
step_phase_override=major.phase if major else None,
step_target_profile_override=step_target,
stage_learning_goal=stage_goal or None,
stage_anti_patterns=stage_anti or None,
stage_match_brief=stage_match_brief,
stage_success_criteria=list(stage_spec.success_criteria or []),
stage_load_profile=list(stage_spec.load_profile or []),
path_context_note=path_context_note,
)
hit = _pick_best_path_hit(
hits,
used,
semantic_brief=stage_brief,
semantic_brief=stage_match_brief,
stage_learning_goal=stage_goal or None,
stage_anti_patterns=stage_anti or None,
roadmap_stage_match=True,
stage_match_brief=stage_match_brief,
)
if not hit:

View File

@ -426,12 +426,14 @@ def detect_off_topic_steps(
brief=step_brief,
step_phase=phase,
)
stage_anti = list(step.get("roadmap_anti_patterns") or [])
if stage_goal and not exercise_passes_stage_learning_goal_gate(
learning_goal=stage_goal,
title=bundle["title"],
summary=bundle["summary"],
goal=bundle["goal"],
semantic_score=sem,
anti_patterns=stage_anti or None,
):
off_topic.append(
{

View File

@ -14,11 +14,14 @@ from planning_exercise_profiles import (
load_exercise_match_profiles_bulk,
score_exercise_against_target,
)
from exercise_ai import strip_html_to_plain
from planning_exercise_semantics import (
PlanningSemanticBrief,
build_stage_match_brief,
exercise_passes_path_semantic_gate,
exercise_passes_stage_learning_goal_gate,
exercise_passes_stage_fit,
score_exercise_semantic_relevance,
score_exercise_stage_fit,
)
_MAX_LIBRARY_ROWS = 8000
@ -149,7 +152,7 @@ def _load_exercise_goals_chunked(cur, exercise_ids: Sequence[int], *, batch: int
ph = ",".join(["%s"] * len(chunk))
cur.execute(f"SELECT id, goal FROM exercises WHERE id IN ({ph})", chunk)
for row in cur.fetchall():
out[int(row["id"])] = str(row.get("goal") or "")
out[int(row["id"])] = strip_html_to_plain(row.get("goal"), max_len=1200)
return out
@ -203,6 +206,19 @@ def rank_visible_library_hits(
path_mode = pack.get("context_mode") == "progression_path"
stage_learning_goal = (pack.get("stage_learning_goal") or "").strip()
roadmap_stage_match = bool(pack.get("roadmap_stage_match"))
stage_match_brief_raw = pack.get("stage_match_brief")
stage_match_brief: Optional[PlanningSemanticBrief] = None
if isinstance(stage_match_brief_raw, PlanningSemanticBrief):
stage_match_brief = stage_match_brief_raw
elif roadmap_stage_match and stage_learning_goal:
stage_match_brief = build_stage_match_brief(
learning_goal=stage_learning_goal,
anti_patterns=pack.get("stage_anti_patterns"),
success_criteria=pack.get("stage_success_criteria"),
load_profile=pack.get("stage_load_profile"),
phase=step_phase,
path_context_note=pack.get("path_context_note"),
)
last_planned_skills: Set[int] = set()
planned_ids = pack.get("planned_exercise_ids") or []
@ -229,7 +245,11 @@ def rank_visible_library_hits(
skills_by_ex = _load_skill_sets_chunked(cur, cand_ids)
goals_by_ex: Dict[int, str] = {}
variants_by_ex: Dict[int, List[str]] = {}
if semantic_brief and semantic_brief.semantic_strength > 0.05:
need_exercise_semantic_text = (
(semantic_brief and semantic_brief.semantic_strength > 0.05)
or (stage_match_brief and stage_match_brief.semantic_strength > 0.05)
)
if need_exercise_semantic_text:
goals_by_ex = _load_exercise_goals_chunked(cur, cand_ids)
variants_by_ex = _load_variant_names_chunked(cur, cand_ids)
@ -270,52 +290,75 @@ def rank_visible_library_hits(
emp, target, intent=intent
)
title_s = str(row.get("title") or "")
summary_s = str(row.get("summary") or "")
goal_s = goals_by_ex.get(eid, "")
semantic_score = 0.0
semantic_reasons: List[str] = []
if semantic_brief and semantic_brief.semantic_strength > 0.05:
semantic_score, semantic_reasons = score_exercise_semantic_relevance(
title=str(row.get("title") or ""),
summary=str(row.get("summary") or ""),
goal=goals_by_ex.get(eid, ""),
title=title_s,
summary=summary_s,
goal=goal_s,
variant_names=variants_by_ex.get(eid, []),
brief=semantic_brief,
step_phase=step_phase,
)
stage_semantic_score = 0.0
stage_semantic_reasons: List[str] = []
if stage_match_brief and stage_match_brief.semantic_strength > 0.05:
stage_semantic_score, stage_semantic_reasons = score_exercise_stage_fit(
title=title_s,
summary=summary_s,
goal=goal_s,
variant_names=variants_by_ex.get(eid, []),
stage_brief=stage_match_brief,
step_phase=step_phase,
)
effective_semantic = (
stage_semantic_score
if roadmap_stage_match and stage_match_brief
else semantic_score
)
score_penalty = 0.0
stage_match_reason: Optional[str] = None
if (
path_mode
and not roadmap_stage_match
and semantic_brief
and semantic_brief.semantic_strength >= 0.55
and not exercise_passes_path_semantic_gate(
semantic_score=semantic_score,
title=str(row.get("title") or ""),
summary=str(row.get("summary") or ""),
goal=goals_by_ex.get(eid, ""),
title=title_s,
summary=summary_s,
goal=goal_s,
brief=semantic_brief,
strict=True,
)
):
score_penalty = 0.42
if roadmap_stage_match and stage_learning_goal:
title_s = str(row.get("title") or "")
summary_s = str(row.get("summary") or "")
goal_s = goals_by_ex.get(eid, "")
if exercise_passes_stage_learning_goal_gate(
if exercise_passes_stage_fit(
learning_goal=stage_learning_goal,
title=title_s,
summary=summary_s,
goal=goal_s,
semantic_score=semantic_score,
stage_brief=stage_match_brief,
stage_semantic_score=stage_semantic_score,
anti_patterns=pack.get("stage_anti_patterns"),
step_phase=step_phase,
):
score_penalty = max(0.0, score_penalty - 0.08)
score_penalty = max(0.0, score_penalty - 0.10)
stage_match_reason = "Passt zum Stufen-Lernziel"
else:
score_penalty += 0.35
score_penalty += 0.48
score = (
weights.get("semantic", 0.0) * semantic_score
weights.get("semantic", 0.0) * effective_semantic
+ weights["fulltext"] * ft_norm
+ weights["progression"] * prog_hit
+ weights["skill"] * skill_sim
@ -329,7 +372,11 @@ def rank_visible_library_hits(
reasons: List[str] = []
if stage_match_reason:
reasons.append(stage_match_reason)
if semantic_score >= 0.35 and semantic_reasons:
if roadmap_stage_match and stage_semantic_score >= 0.30 and stage_semantic_reasons:
for sr in stage_semantic_reasons:
if sr not in reasons:
reasons.append(sr)
elif semantic_score >= 0.35 and semantic_reasons:
for sr in semantic_reasons:
if sr not in reasons:
reasons.append(sr)
@ -365,6 +412,8 @@ def rank_visible_library_hits(
"score": round(max(0.0, min(1.0, score)), 4),
"reasons": reasons,
"semantic_score": round(semantic_score, 4),
"stage_semantic_score": round(stage_semantic_score, 4),
"goal": goal_s,
}
)
succ_variants = pack.get("progression_successor_variants") or {}

View File

@ -9,6 +9,7 @@ from __future__ import annotations
import json
import logging
import re
from dataclasses import dataclass, field
from typing import Any, Dict, List, Mapping, Optional, Sequence, Tuple
from pydantic import BaseModel, Field, field_validator
@ -462,7 +463,7 @@ def score_exercise_semantic_relevance(
core_hits = sum(1 for ph in core if _phrase_in_blob(ph, blob))
must_hits = sum(1 for ph in must if _phrase_in_blob(ph, blob))
exclude_hits = sum(1 for ph in exclude if _phrase_in_blob(ph, blob))
exclude_hits = sum(1 for ph in exclude if _phrase_excluded_in_blob(ph, blob))
score = 0.0
if core:
@ -623,9 +624,82 @@ _STAGE_GOAL_STOPWORDS = _QUERY_STOPWORDS | frozenset(
)
def _significant_stage_tokens(learning_goal: str) -> List[str]:
"""Wörter aus Stufen-Lernziel für Text-Match (ohne Füllwörter)."""
raw = re.findall(r"[a-zäöüß]{4,}", _normalize_phrase(learning_goal), flags=re.IGNORECASE)
_STAGE_NEGATION_PATTERNS = (
r"\bohne\s+([^,.;]+)",
r"\bkein(?:e|en|er|em)?\s+([^,.;]+)",
r"\bnicht\s+([^,.;]+)",
)
# Aus „ohne Tritttechnik“ etc. — erweiterte Treffer im Übungstext
_STAGE_EXCLUDE_ALIASES: Dict[str, Tuple[str, ...]] = {
"tritttechnik": (
"tritttechnik",
"trittpraezision",
"trittpräzision",
"tritt praesision",
"tritt-präzision",
"kicktechnik",
"tritt ausführung",
"tritt ausfuehrung",
),
"kumite": ("kumite", "partnerkampf", "freikampf", "jiyu kumite"),
"kraftuebung": ("kraftuebung", "kraftübung", "krafttraining", "kraftübungen"),
"anwendung": ("kumite anwendung", "kampfanwendung"),
}
_STAGE_FOCUS_TOKENS = frozenset(
{
"koordination",
"absprung",
"beinhebung",
"landung",
"sprung",
"sprungphase",
"balance",
"gleichgewicht",
"timing",
"vorbereitung",
"athletik",
"mobilitaet",
"mobilität",
"stabilisation",
"stabilisierung",
}
)
@dataclass
class StageGoalConstraints:
positive_tokens: List[str] = field(default_factory=list)
exclude_phrases: List[str] = field(default_factory=list)
has_negation: bool = False
strict_positive: bool = False
def _expand_stage_exclude_phrase(phrase: str) -> List[str]:
norm = _normalize_phrase(phrase)
if not norm:
return []
out: List[str] = [norm]
compact = norm.replace(" ", "")
if compact and compact not in out:
out.append(compact)
for key, aliases in _STAGE_EXCLUDE_ALIASES.items():
if key in norm or norm in key:
for alias in aliases:
a = _normalize_phrase(alias)
if a and a not in out:
out.append(a)
return out[:12]
def _significant_stage_tokens(learning_goal: str, *, strip_negated: bool = True) -> List[str]:
"""Wörter aus Stufen-Lernziel für Text-Match (ohne Füllwörter, ohne Negationssegmente)."""
text = _normalize_phrase(learning_goal)
if strip_negated:
for pat in _STAGE_NEGATION_PATTERNS:
text = re.sub(pat, " ", text)
raw = re.findall(r"[a-zäöüß]{4,}", text, flags=re.IGNORECASE)
out: List[str] = []
for w in raw:
low = w.lower().replace("ä", "ae").replace("ö", "oe").replace("ü", "ue")
@ -636,19 +710,245 @@ def _significant_stage_tokens(learning_goal: str) -> List[str]:
return out[:10]
def parse_stage_goal_constraints(
learning_goal: str,
anti_patterns: Optional[Sequence[str]] = None,
) -> StageGoalConstraints:
"""Positiv/Negativ aus Stufen-Lernziel + anti_patterns (Roadmap-Stufe)."""
lg = (learning_goal or "").strip()
if len(lg) < 3:
return StageGoalConstraints()
norm = _normalize_phrase(lg)
exclude: List[str] = []
has_negation = False
for pat in _STAGE_NEGATION_PATTERNS:
for m in re.finditer(pat, norm):
has_negation = True
chunk = (m.group(1) or "").strip()
if chunk:
exclude.extend(_expand_stage_exclude_phrase(chunk))
for raw in anti_patterns or []:
s = _normalize_phrase(str(raw or ""))
if s:
exclude.extend(_expand_stage_exclude_phrase(s))
positive = _significant_stage_tokens(lg, strip_negated=True)
focus_hits = [t for t in positive if t in _STAGE_FOCUS_TOKENS]
strict_positive = bool(focus_hits) or has_negation
dedup_exclude: List[str] = []
for item in exclude:
if item and item not in dedup_exclude:
dedup_exclude.append(item)
return StageGoalConstraints(
positive_tokens=positive,
exclude_phrases=dedup_exclude[:16],
has_negation=has_negation,
strict_positive=strict_positive,
)
def _phrase_excluded_in_blob(phrase: str, blob: str) -> bool:
"""Treffer nur wenn das Ausschluss-Thema nicht selbst negiert beschrieben ist."""
if not phrase or not blob:
return False
if not _phrase_in_blob(phrase, blob):
return False
norm = _normalize_phrase(phrase)
for pat in _STAGE_NEGATION_PATTERNS:
for m in re.finditer(pat, blob):
chunk = _normalize_phrase(m.group(1) or "")
if not chunk:
continue
if norm in chunk or chunk in norm or _phrase_in_blob(norm, chunk):
return False
return True
def _blob_matches_stage_excludes(blob: str, exclude_phrases: Sequence[str]) -> bool:
for phrase in exclude_phrases:
if _phrase_excluded_in_blob(phrase, blob):
return True
return False
_MIN_STAGE_FIT_SEMANTIC = 0.30
_MIN_STAGE_FIT_RELAXED = 0.20
def build_stage_match_brief(
*,
learning_goal: str,
anti_patterns: Optional[Sequence[str]] = None,
success_criteria: Optional[Sequence[str]] = None,
load_profile: Optional[Sequence[str]] = None,
phase: Optional[str] = None,
path_context_note: Optional[str] = None,
) -> PlanningSemanticBrief:
"""
Stufen-zentrierter Semantik-Brief unabhängig vom Gesamt-Pfad-Thema.
Primär für Roadmap-Match: Bewertung gegen Titel + Kurzbeschreibung + Übungsziel.
"""
lg = (learning_goal or "").strip()
if len(lg) < 3:
return PlanningSemanticBrief(semantic_strength=0.0)
constraints = parse_stage_goal_constraints(lg, anti_patterns)
must: List[str] = []
norm_lg = _normalize_phrase(lg)
for token in constraints.positive_tokens:
if token not in must:
must.append(token)
if norm_lg and norm_lg not in must:
must.append(norm_lg[:120])
for raw in success_criteria or []:
s = _normalize_phrase(str(raw or ""))
if s and s not in must:
must.append(s[:100])
for raw in load_profile or []:
s = _normalize_phrase(str(raw or ""))
if s and s not in must:
must.append(s[:60])
retrieval_parts = [norm_lg]
if path_context_note:
note = _normalize_phrase(path_context_note)[:200]
if note:
retrieval_parts.append(note)
arc: List[str] = []
ph = (phase or "").strip().lower()
if ph:
arc.append(ph)
return PlanningSemanticBrief(
primary_topic="",
topic_type="focus",
must_phrases=must[:12],
exclude_phrases=list(constraints.exclude_phrases)[:12],
development_arc=arc[:4],
retrieval_query=" ".join(p for p in retrieval_parts if p)[:500],
semantic_strength=0.78,
rationale="stage_match_brief",
)
def score_exercise_stage_fit(
*,
title: str,
summary: str,
goal: str,
stage_brief: PlanningSemanticBrief,
variant_names: Optional[Sequence[str]] = None,
step_phase: Optional[str] = None,
) -> Tuple[float, List[str]]:
"""Semantik-Score Übung ↔ Stufen-Lernziel (Titel + Summary + Goal)."""
score, reasons = score_exercise_semantic_relevance(
title=title,
summary=summary,
goal=goal,
variant_names=variant_names or [],
brief=stage_brief,
step_phase=step_phase,
)
blob = _blob_from_fields(title, summary, goal, variant_names or [])
focus_tokens = [
t
for t in (stage_brief.must_phrases or [])
if t and " " not in t and len(t) >= 4
][:6]
if focus_tokens:
hits = sum(1 for t in focus_tokens if _phrase_in_blob(t, blob))
ratio = hits / len(focus_tokens)
bonus = 0.28 * ratio
if bonus > 0:
score = min(1.0, score + bonus)
if hits >= max(1, len(focus_tokens) // 2):
reasons = ["Stufen-Schwerpunkte im Übungstext", *reasons]
return max(0.0, min(1.0, round(score, 4))), reasons[:4]
def exercise_passes_stage_fit(
*,
learning_goal: str,
title: str,
summary: str = "",
goal: str = "",
stage_brief: Optional[PlanningSemanticBrief] = None,
stage_semantic_score: Optional[float] = None,
anti_patterns: Optional[Sequence[str]] = None,
step_phase: Optional[str] = None,
min_stage_semantic: float = _MIN_STAGE_FIT_SEMANTIC,
relaxed: bool = False,
) -> bool:
"""Allgemeines Stufen-Fit-Gate: voller Übungstext vs. Stufen-Brief."""
lg = (learning_goal or "").strip()
if len(lg) < 3:
return True
blob = _blob_from_fields(title, summary, goal, [])
constraints = parse_stage_goal_constraints(lg, anti_patterns)
if constraints.exclude_phrases and _blob_matches_stage_excludes(blob, constraints.exclude_phrases):
return False
brief = stage_brief or build_stage_match_brief(
learning_goal=lg,
anti_patterns=anti_patterns,
)
stage_sem = stage_semantic_score
if stage_sem is None:
stage_sem, _ = score_exercise_stage_fit(
title=title,
summary=summary,
goal=goal,
stage_brief=brief,
step_phase=step_phase,
)
threshold = _MIN_STAGE_FIT_RELAXED if relaxed else min_stage_semantic
return float(stage_sem or 0.0) >= threshold
def apply_stage_match_retrieval_weights(brief: PlanningSemanticBrief) -> Dict[str, float]:
"""Roadmap-Stufe: Stufen-Semantik (Ziel/Summary/Goal) dominiert."""
return {
"semantic": 0.58,
"fulltext": 0.14,
"profile": 0.18,
"progression": 0.04,
"skill": 0.04,
"plan": 0.02,
"repeat_unit": -0.40,
"repeat_group": -0.15,
}
def semantic_brief_for_stage(
brief: PlanningSemanticBrief,
*,
learning_goal: str,
phase: Optional[str] = None,
anti_patterns: Optional[Sequence[str]] = None,
) -> PlanningSemanticBrief:
"""Brief um Stufen-Lernziel erweitern — für Roadmap-Match pro Major Step."""
"""Legacy: globalen Brief anreichern — bevorzugt build_stage_match_brief für Roadmap-Match."""
lg = _normalize_phrase(learning_goal)
if not lg:
return brief
constraints = parse_stage_goal_constraints(learning_goal, anti_patterns)
must = list(brief.must_phrases or [])
for token in constraints.positive_tokens[:4]:
if token not in must:
must.append(token)
if lg not in must:
must.insert(0, lg[:120])
exclude = list(brief.exclude_phrases or [])
for item in constraints.exclude_phrases:
if item not in exclude:
exclude.append(item)
arc = list(brief.development_arc or [])
ph = (phase or "").strip().lower()
if ph and ph not in arc:
@ -657,6 +957,7 @@ def semantic_brief_for_stage(
return brief.model_copy(
update={
"must_phrases": must[:12],
"exclude_phrases": exclude[:12],
"development_arc": arc[:8],
"semantic_strength": min(1.0, strength),
}
@ -672,33 +973,24 @@ def exercise_passes_stage_learning_goal_gate(
semantic_score: float = 0.0,
min_semantic: float = 0.20,
relaxed: bool = False,
anti_patterns: Optional[Sequence[str]] = None,
stage_brief: Optional[PlanningSemanticBrief] = None,
stage_semantic_score: Optional[float] = None,
step_phase: Optional[str] = None,
) -> bool:
"""Roadmap-Stufe: Übung muss zum Stufen-Lernziel passen, nicht nur zum Gesamtthema."""
lg = (learning_goal or "").strip()
if len(lg) < 3:
return True
blob = _blob_from_fields(title, summary, goal, [])
norm_lg = _normalize_phrase(lg)
if _phrase_in_blob(norm_lg, blob):
return True
tokens = _significant_stage_tokens(lg)
if not tokens:
threshold = 0.12 if relaxed else min_semantic
return semantic_score >= threshold
hits = sum(1 for t in tokens if _phrase_in_blob(t, blob))
if len(tokens) <= 2:
required = 1
else:
required = max(2, (len(tokens) + 1) // 2)
if hits >= required:
return True
threshold = 0.14 if relaxed else min_semantic
return semantic_score >= threshold
"""Roadmap-Stufe: delegiert an exercise_passes_stage_fit (Titel + Summary + Goal)."""
del semantic_score, min_semantic
return exercise_passes_stage_fit(
learning_goal=learning_goal,
title=title,
summary=summary,
goal=goal,
stage_brief=stage_brief,
stage_semantic_score=stage_semantic_score,
anti_patterns=anti_patterns,
step_phase=step_phase,
relaxed=relaxed,
)
def exercise_passes_path_semantic_gate(
@ -739,7 +1031,9 @@ def pick_best_path_hit(
*,
semantic_brief: Optional[PlanningSemanticBrief] = None,
stage_learning_goal: Optional[str] = None,
stage_anti_patterns: Optional[Sequence[str]] = None,
roadmap_stage_match: bool = False,
stage_match_brief: Optional[PlanningSemanticBrief] = None,
) -> Optional[Dict[str, Any]]:
"""Gestufte Auswahl: strikt → relaxed → optional Notfall-Fallback."""
if not hits:
@ -747,6 +1041,13 @@ def pick_best_path_hit(
stage_goal = (stage_learning_goal or "").strip()
stage_brief: Optional[PlanningSemanticBrief] = stage_match_brief
if roadmap_stage_match and stage_goal and stage_brief is None:
stage_brief = build_stage_match_brief(
learning_goal=stage_goal,
anti_patterns=stage_anti_patterns,
)
def _scan(*, strict: bool) -> Optional[Dict[str, Any]]:
best: Optional[Dict[str, Any]] = None
best_key: Tuple[float, float] = (-1.0, -1.0)
@ -754,28 +1055,38 @@ def pick_best_path_hit(
eid = int(hit["id"])
if eid in used_exercise_ids:
continue
sem = float(hit.get("semantic_score") or 0.0)
title = str(hit.get("title") or "")
summary = str(hit.get("summary") or "")
goal_text = str(hit.get("goal") or hit.get("exercise_goal") or "")
sem = float(hit.get("semantic_score") or 0.0)
stage_sem = float(hit.get("stage_semantic_score") or sem)
if roadmap_stage_match and stage_goal:
if not exercise_passes_stage_fit(
learning_goal=stage_goal,
title=title,
summary=summary,
goal=goal_text,
stage_brief=stage_brief,
stage_semantic_score=stage_sem,
anti_patterns=stage_anti_patterns,
relaxed=not strict,
):
continue
else:
if semantic_brief and not exercise_passes_path_semantic_gate(
semantic_score=sem,
title=title,
summary=summary,
goal="",
goal=goal_text,
brief=semantic_brief,
strict=strict,
):
continue
if stage_goal and not exercise_passes_stage_learning_goal_gate(
learning_goal=stage_goal,
title=title,
summary=summary,
semantic_score=sem,
relaxed=not strict,
):
continue
score = float(hit.get("score") or 0.0)
key = (sem, score)
rank_sem = stage_sem if roadmap_stage_match and stage_goal else sem
key = (rank_sem, score)
if key > best_key:
best_key = key
best = hit
@ -820,9 +1131,15 @@ __all__ = [
"build_semantic_brief",
"enrich_target_with_semantic_expectations",
"exercise_passes_path_semantic_gate",
"StageGoalConstraints",
"apply_stage_match_retrieval_weights",
"build_stage_match_brief",
"exercise_passes_stage_fit",
"exercise_passes_stage_learning_goal_gate",
"merge_semantic_brief_llm",
"parse_stage_goal_constraints",
"pick_best_path_hit",
"score_exercise_stage_fit",
"semantic_brief_for_stage",
"resolve_semantic_skill_weights",
"score_exercise_semantic_relevance",

View File

@ -1,7 +1,9 @@
"""Tests Roadmap-Stufen-Match — Gate gegen themenfremde Übungen."""
from planning_exercise_semantics import (
build_stage_match_brief,
exercise_passes_stage_learning_goal_gate,
pick_best_path_hit,
score_exercise_stage_fit,
semantic_brief_for_stage,
build_semantic_brief,
)
@ -35,10 +37,37 @@ def test_semantic_brief_for_stage_adds_learning_goal():
assert "hüftmobilität und kammerhaltung" in stage.must_phrases[0]
def test_build_stage_match_brief_uses_stage_tokens_not_global_topic():
brief = build_stage_match_brief(
learning_goal="Koordination von Absprung und Beinhebung ohne Tritttechnik",
phase="vertiefung",
)
must_blob = " ".join(brief.must_phrases or []).lower()
assert "mawashi" not in must_blob
assert "absprung" in must_blob
assert not (brief.primary_topic or "").strip()
def test_stage_fit_prefers_goal_over_misleading_title():
stage_goal = "Koordination von Absprung und Beinhebung ohne Tritttechnik"
stage_brief = build_stage_match_brief(learning_goal=stage_goal)
kick_score, _ = score_exercise_stage_fit(
title="Mawashi Geri Trittpräzision",
summary="Kicktechnik",
goal="Präzision im Tritt und Hüftarbeit",
stage_brief=stage_brief,
)
coord_score, _ = score_exercise_stage_fit(
title="Allgemeines Sprungtraining",
summary="Athletik",
goal="Absprung, Beinhebung und Landung koordinieren — ohne Trittausführung",
stage_brief=stage_brief,
)
assert coord_score > kick_score
def test_pick_best_path_hit_roadmap_stage_no_weak_fallback():
brief = build_semantic_brief("Mae Geri Perfektion")
stage_brief = semantic_brief_for_stage(
brief,
stage_brief = build_stage_match_brief(
learning_goal="Hüftmobilität für Mae Geri",
phase="grundlage",
)
@ -69,9 +98,7 @@ def test_pick_best_path_hit_roadmap_stage_no_weak_fallback():
def test_pick_best_path_hit_roadmap_stage_picks_relevant():
brief = build_semantic_brief("Mae Geri Perfektion")
stage_brief = semantic_brief_for_stage(
brief,
stage_brief = build_stage_match_brief(
learning_goal="Hüftmobilität für Mae Geri",
phase="grundlage",
)
@ -94,3 +121,63 @@ def test_pick_best_path_hit_roadmap_stage_picks_relevant():
)
assert chosen is not None
assert int(chosen["id"]) == 2
def test_stage_gate_rejects_tritt_when_goal_says_ohne_tritttechnik():
"""Regression: gesprungener Mawashi — Slot Koordination ohne Tritttechnik."""
goal = "Koordination von Absprung und Beinhebung ohne Tritttechnik"
assert not exercise_passes_stage_learning_goal_gate(
learning_goal=goal,
title="Verbesserung der Trittpräzision des Mawashi Geri und der Hüftbewegung",
summary="Präzision und Hüftarbeit im Stand",
semantic_score=0.72,
)
def test_stage_gate_accepts_absprung_drill_not_kick_focus():
goal = "Koordination von Absprung und Beinhebung ohne Tritttechnik"
assert exercise_passes_stage_learning_goal_gate(
learning_goal=goal,
title="Sprungkoordination — Absprung und Beinhebung",
summary="Ohne Trittausführung, Fokus Gleichgewicht und Timing",
semantic_score=0.35,
)
def test_pick_best_rejects_mawashi_tritt_precision_for_coordination_slot():
stage_goal = "Koordination von Absprung und Beinhebung ohne Tritttechnik"
stage_brief = build_stage_match_brief(learning_goal=stage_goal, phase="vertiefung")
hits = [
{
"id": 99,
"title": "Verbesserung der Trittpräzision des Mawashi Geri und der Hüftbewegung",
"summary": "Tritttechnik und Hüfte im Stand",
"score": 0.91,
"semantic_score": 0.68,
},
{
"id": 100,
"title": "Absprung und Beinhebung — Koordination ohne Kick",
"summary": "Sprungvorbereitung, kein Tritt",
"score": 0.62,
"semantic_score": 0.41,
},
]
chosen = pick_best_path_hit(
hits,
set(),
semantic_brief=stage_brief,
stage_learning_goal=stage_goal,
roadmap_stage_match=True,
)
assert chosen is not None
assert int(chosen["id"]) == 100
def test_parse_stage_goal_constraints_extracts_ohne_tritttechnik():
from planning_exercise_semantics import parse_stage_goal_constraints
c = parse_stage_goal_constraints("Koordination von Absprung und Beinhebung ohne Tritttechnik")
assert c.has_negation
assert "absprung" in c.positive_tokens
assert any("tritt" in ex for ex in c.exclude_phrases)

View File

@ -1,6 +1,6 @@
# Shinkan Jinkendo Version Information
APP_VERSION = "0.8.218"
APP_VERSION = "0.8.220"
BUILD_DATE = "2026-06-07"
DB_SCHEMA_VERSION = "20260607088"
@ -53,6 +53,22 @@ MODULE_VERSIONS = {
}
CHANGELOG = [
{
"version": "0.8.220",
"date": "2026-06-07",
"changes": [
"Roadmap-Stufen-Match: build_stage_match_brief + stage_semantic_score über Titel, Summary und Goal.",
"Retriever lädt Übungsziele immer bei Stufen-Match; Ranking nach Stufen-Fit statt Gesamtthema.",
],
},
{
"version": "0.8.219",
"date": "2026-06-07",
"changes": [
"Roadmap-Stufen-Gate: Negationen (ohne Tritttechnik) + Pflicht-Treffer Absprung/Beinhebung.",
"anti_patterns in Stufen-Match; Gesamt-Thema allein reicht bei strict_positive nicht mehr.",
],
},
{
"version": "0.8.218",
"date": "2026-06-07",