Enhance Stage Matching and Retrieval Logic in Planning Exercise
All checks were successful
Deploy Development / deploy (push) Successful in 43s
Test Suite / pytest-backend (push) Successful in 44s
Test Suite / lint-backend (push) Successful in 0s
Test Suite / build-frontend (push) Successful in 13s
Test Suite / k6 /health Baseline (push) Successful in 34s
Test Suite / playwright-tests (push) Successful in 1m16s

- Introduced `build_stage_match_brief` to create stage-specific semantic briefs, improving roadmap matching accuracy.
- Updated path retrieval logic to differentiate between general and stage-specific semantic weights, enhancing exercise relevance.
- Added support for anti-patterns and success criteria in stage matching, allowing for more nuanced exercise selection.
- Enhanced tests to validate new stage matching features and ensure correct functionality against learning goals.
- Incremented application version to reflect these updates.
This commit is contained in:
Lars 2026-06-10 17:02:21 +02:00
parent 18547613ea
commit 07e147bc76
6 changed files with 591 additions and 81 deletions

View File

@ -32,13 +32,14 @@ from planning_exercise_retrieval import run_multistage_planning_retrieval
from planning_exercise_semantics import ( from planning_exercise_semantics import (
PlanningSemanticBrief, PlanningSemanticBrief,
apply_path_retrieval_weights, apply_path_retrieval_weights,
apply_stage_match_retrieval_weights,
brief_to_summary_dict, brief_to_summary_dict,
build_semantic_brief, build_semantic_brief,
build_stage_match_brief,
enrich_target_with_semantic_expectations, enrich_target_with_semantic_expectations,
exercise_passes_path_semantic_gate, exercise_passes_path_semantic_gate,
pick_best_path_hit, pick_best_path_hit,
resolve_semantic_skill_weights, resolve_semantic_skill_weights,
semantic_brief_for_stage,
step_phase_for_index, step_phase_for_index,
step_retrieval_query, step_retrieval_query,
try_enrich_semantic_brief_with_llm, try_enrich_semantic_brief_with_llm,
@ -185,14 +186,18 @@ def _pick_best_path_hit(
*, *,
semantic_brief: Optional[PlanningSemanticBrief] = None, semantic_brief: Optional[PlanningSemanticBrief] = None,
stage_learning_goal: Optional[str] = None, stage_learning_goal: Optional[str] = None,
stage_anti_patterns: Optional[List[str]] = None,
roadmap_stage_match: bool = False, roadmap_stage_match: bool = False,
stage_match_brief: Optional[PlanningSemanticBrief] = None,
) -> Optional[Dict[str, Any]]: ) -> Optional[Dict[str, Any]]:
return pick_best_path_hit( return pick_best_path_hit(
hits, hits,
used_exercise_ids, used_exercise_ids,
semantic_brief=semantic_brief, semantic_brief=semantic_brief,
stage_learning_goal=stage_learning_goal, stage_learning_goal=stage_learning_goal,
stage_anti_patterns=stage_anti_patterns,
roadmap_stage_match=roadmap_stage_match, roadmap_stage_match=roadmap_stage_match,
stage_match_brief=stage_match_brief,
) )
@ -292,6 +297,11 @@ def _run_path_step_retrieval(
step_phase_override: Optional[str] = None, step_phase_override: Optional[str] = None,
step_target_profile_override: Optional[PlanningTargetProfile] = None, step_target_profile_override: Optional[PlanningTargetProfile] = None,
stage_learning_goal: Optional[str] = None, stage_learning_goal: Optional[str] = None,
stage_anti_patterns: Optional[List[str]] = None,
stage_match_brief: Optional[PlanningSemanticBrief] = None,
stage_success_criteria: Optional[List[str]] = None,
stage_load_profile: Optional[List[str]] = None,
path_context_note: Optional[str] = None,
) -> Tuple[List[Dict[str, Any]], PlanningTargetProfile, Dict[str, Any], str]: ) -> Tuple[List[Dict[str, Any]], PlanningTargetProfile, Dict[str, Any], str]:
step_query = step_query_override or step_retrieval_query( step_query = step_query_override or step_retrieval_query(
semantic_brief, goal_query, step_index, max_steps semantic_brief, goal_query, step_index, max_steps
@ -328,7 +338,12 @@ def _run_path_step_retrieval(
"path_step_phase": step_phase_override "path_step_phase": step_phase_override
or step_phase_for_index(semantic_brief, step_index, max_steps), or step_phase_for_index(semantic_brief, step_index, max_steps),
"stage_learning_goal": (stage_learning_goal or "").strip() or None, "stage_learning_goal": (stage_learning_goal or "").strip() or None,
"stage_anti_patterns": list(stage_anti_patterns or []),
"roadmap_stage_match": bool((stage_learning_goal or "").strip()), "roadmap_stage_match": bool((stage_learning_goal or "").strip()),
"stage_match_brief": stage_match_brief,
"stage_success_criteria": list(stage_success_criteria or []),
"stage_load_profile": list(stage_load_profile or []),
"path_context_note": (path_context_note or "").strip() or None,
} }
pack = apply_progression_context_to_pack( pack = apply_progression_context_to_pack(
cur, cur,
@ -383,7 +398,10 @@ def _run_path_step_retrieval(
has_planning_reference=has_plan_ref, has_planning_reference=has_plan_ref,
) )
weights = apply_path_retrieval_weights(semantic_brief) if pack.get("roadmap_stage_match"):
weights = apply_stage_match_retrieval_weights(semantic_brief)
else:
weights = apply_path_retrieval_weights(semantic_brief)
profile_id = tenant.profile_id profile_id = tenant.profile_id
role = tenant.global_role role = tenant.global_role
@ -490,6 +508,8 @@ def _annotate_roadmap_step(
step["roadmap_major_step_index"] = stage_spec.major_step_index step["roadmap_major_step_index"] = stage_spec.major_step_index
step["roadmap_phase"] = major_step.phase if major_step else None step["roadmap_phase"] = major_step.phase if major_step else None
step["roadmap_learning_goal"] = learning_goal or None step["roadmap_learning_goal"] = learning_goal or None
if stage_spec.anti_patterns:
step["roadmap_anti_patterns"] = list(stage_spec.anti_patterns)
step["roadmap_match_source"] = "stage_spec" step["roadmap_match_source"] = "stage_spec"
if skill_expectations: if skill_expectations:
step["skill_expectations"] = skill_expectations step["skill_expectations"] = skill_expectations
@ -569,10 +589,22 @@ def _build_steps_roadmap_first(
) )
step_kind = resolve_step_exercise_kind_filter(stage_spec, body.exercise_kind_any) step_kind = resolve_step_exercise_kind_filter(stage_spec, body.exercise_kind_any)
stage_goal = (stage_spec.learning_goal or "").strip() stage_goal = (stage_spec.learning_goal or "").strip()
stage_brief = semantic_brief_for_stage( stage_anti = list(stage_spec.anti_patterns or [])
semantic_brief, path_context_note = None
if rs_dump:
ctx_parts = [
str(rs_dump.get("start_situation") or "").strip()[:120],
str(rs_dump.get("target_state") or "").strip()[:120],
str(rs_dump.get("roadmap_notes") or "").strip()[:120],
]
path_context_note = " ".join(p for p in ctx_parts if p)[:240] or None
stage_match_brief = build_stage_match_brief(
learning_goal=stage_goal, learning_goal=stage_goal,
anti_patterns=stage_anti,
success_criteria=list(stage_spec.success_criteria or []),
load_profile=list(stage_spec.load_profile or []),
phase=major.phase if major else None, phase=major.phase if major else None,
path_context_note=path_context_note,
) )
hits, _, _, _ = _run_path_step_retrieval( hits, _, _, _ = _run_path_step_retrieval(
@ -587,21 +619,28 @@ def _build_steps_roadmap_first(
progression_graph_id=body.progression_graph_id, progression_graph_id=body.progression_graph_id,
include_llm_intent=body.include_llm_intent and step_index == 0, include_llm_intent=body.include_llm_intent and step_index == 0,
exercise_kind_any=step_kind, exercise_kind_any=step_kind,
semantic_brief=stage_brief, semantic_brief=stage_match_brief,
path_target_profile=path_target_profile, path_target_profile=path_target_profile,
path_intent=path_intent, path_intent=path_intent,
step_query_override=step_query, step_query_override=step_query,
step_phase_override=major.phase if major else None, step_phase_override=major.phase if major else None,
step_target_profile_override=step_target, step_target_profile_override=step_target,
stage_learning_goal=stage_goal or None, stage_learning_goal=stage_goal or None,
stage_anti_patterns=stage_anti or None,
stage_match_brief=stage_match_brief,
stage_success_criteria=list(stage_spec.success_criteria or []),
stage_load_profile=list(stage_spec.load_profile or []),
path_context_note=path_context_note,
) )
hit = _pick_best_path_hit( hit = _pick_best_path_hit(
hits, hits,
used, used,
semantic_brief=stage_brief, semantic_brief=stage_match_brief,
stage_learning_goal=stage_goal or None, stage_learning_goal=stage_goal or None,
stage_anti_patterns=stage_anti or None,
roadmap_stage_match=True, roadmap_stage_match=True,
stage_match_brief=stage_match_brief,
) )
if not hit: if not hit:

View File

@ -426,12 +426,14 @@ def detect_off_topic_steps(
brief=step_brief, brief=step_brief,
step_phase=phase, step_phase=phase,
) )
stage_anti = list(step.get("roadmap_anti_patterns") or [])
if stage_goal and not exercise_passes_stage_learning_goal_gate( if stage_goal and not exercise_passes_stage_learning_goal_gate(
learning_goal=stage_goal, learning_goal=stage_goal,
title=bundle["title"], title=bundle["title"],
summary=bundle["summary"], summary=bundle["summary"],
goal=bundle["goal"], goal=bundle["goal"],
semantic_score=sem, semantic_score=sem,
anti_patterns=stage_anti or None,
): ):
off_topic.append( off_topic.append(
{ {

View File

@ -14,11 +14,14 @@ from planning_exercise_profiles import (
load_exercise_match_profiles_bulk, load_exercise_match_profiles_bulk,
score_exercise_against_target, score_exercise_against_target,
) )
from exercise_ai import strip_html_to_plain
from planning_exercise_semantics import ( from planning_exercise_semantics import (
PlanningSemanticBrief, PlanningSemanticBrief,
build_stage_match_brief,
exercise_passes_path_semantic_gate, exercise_passes_path_semantic_gate,
exercise_passes_stage_learning_goal_gate, exercise_passes_stage_fit,
score_exercise_semantic_relevance, score_exercise_semantic_relevance,
score_exercise_stage_fit,
) )
_MAX_LIBRARY_ROWS = 8000 _MAX_LIBRARY_ROWS = 8000
@ -149,7 +152,7 @@ def _load_exercise_goals_chunked(cur, exercise_ids: Sequence[int], *, batch: int
ph = ",".join(["%s"] * len(chunk)) ph = ",".join(["%s"] * len(chunk))
cur.execute(f"SELECT id, goal FROM exercises WHERE id IN ({ph})", chunk) cur.execute(f"SELECT id, goal FROM exercises WHERE id IN ({ph})", chunk)
for row in cur.fetchall(): for row in cur.fetchall():
out[int(row["id"])] = str(row.get("goal") or "") out[int(row["id"])] = strip_html_to_plain(row.get("goal"), max_len=1200)
return out return out
@ -203,6 +206,19 @@ def rank_visible_library_hits(
path_mode = pack.get("context_mode") == "progression_path" path_mode = pack.get("context_mode") == "progression_path"
stage_learning_goal = (pack.get("stage_learning_goal") or "").strip() stage_learning_goal = (pack.get("stage_learning_goal") or "").strip()
roadmap_stage_match = bool(pack.get("roadmap_stage_match")) roadmap_stage_match = bool(pack.get("roadmap_stage_match"))
stage_match_brief_raw = pack.get("stage_match_brief")
stage_match_brief: Optional[PlanningSemanticBrief] = None
if isinstance(stage_match_brief_raw, PlanningSemanticBrief):
stage_match_brief = stage_match_brief_raw
elif roadmap_stage_match and stage_learning_goal:
stage_match_brief = build_stage_match_brief(
learning_goal=stage_learning_goal,
anti_patterns=pack.get("stage_anti_patterns"),
success_criteria=pack.get("stage_success_criteria"),
load_profile=pack.get("stage_load_profile"),
phase=step_phase,
path_context_note=pack.get("path_context_note"),
)
last_planned_skills: Set[int] = set() last_planned_skills: Set[int] = set()
planned_ids = pack.get("planned_exercise_ids") or [] planned_ids = pack.get("planned_exercise_ids") or []
@ -229,7 +245,11 @@ def rank_visible_library_hits(
skills_by_ex = _load_skill_sets_chunked(cur, cand_ids) skills_by_ex = _load_skill_sets_chunked(cur, cand_ids)
goals_by_ex: Dict[int, str] = {} goals_by_ex: Dict[int, str] = {}
variants_by_ex: Dict[int, List[str]] = {} variants_by_ex: Dict[int, List[str]] = {}
if semantic_brief and semantic_brief.semantic_strength > 0.05: need_exercise_semantic_text = (
(semantic_brief and semantic_brief.semantic_strength > 0.05)
or (stage_match_brief and stage_match_brief.semantic_strength > 0.05)
)
if need_exercise_semantic_text:
goals_by_ex = _load_exercise_goals_chunked(cur, cand_ids) goals_by_ex = _load_exercise_goals_chunked(cur, cand_ids)
variants_by_ex = _load_variant_names_chunked(cur, cand_ids) variants_by_ex = _load_variant_names_chunked(cur, cand_ids)
@ -270,52 +290,75 @@ def rank_visible_library_hits(
emp, target, intent=intent emp, target, intent=intent
) )
title_s = str(row.get("title") or "")
summary_s = str(row.get("summary") or "")
goal_s = goals_by_ex.get(eid, "")
semantic_score = 0.0 semantic_score = 0.0
semantic_reasons: List[str] = [] semantic_reasons: List[str] = []
if semantic_brief and semantic_brief.semantic_strength > 0.05: if semantic_brief and semantic_brief.semantic_strength > 0.05:
semantic_score, semantic_reasons = score_exercise_semantic_relevance( semantic_score, semantic_reasons = score_exercise_semantic_relevance(
title=str(row.get("title") or ""), title=title_s,
summary=str(row.get("summary") or ""), summary=summary_s,
goal=goals_by_ex.get(eid, ""), goal=goal_s,
variant_names=variants_by_ex.get(eid, []), variant_names=variants_by_ex.get(eid, []),
brief=semantic_brief, brief=semantic_brief,
step_phase=step_phase, step_phase=step_phase,
) )
stage_semantic_score = 0.0
stage_semantic_reasons: List[str] = []
if stage_match_brief and stage_match_brief.semantic_strength > 0.05:
stage_semantic_score, stage_semantic_reasons = score_exercise_stage_fit(
title=title_s,
summary=summary_s,
goal=goal_s,
variant_names=variants_by_ex.get(eid, []),
stage_brief=stage_match_brief,
step_phase=step_phase,
)
effective_semantic = (
stage_semantic_score
if roadmap_stage_match and stage_match_brief
else semantic_score
)
score_penalty = 0.0 score_penalty = 0.0
stage_match_reason: Optional[str] = None stage_match_reason: Optional[str] = None
if ( if (
path_mode path_mode
and not roadmap_stage_match
and semantic_brief and semantic_brief
and semantic_brief.semantic_strength >= 0.55 and semantic_brief.semantic_strength >= 0.55
and not exercise_passes_path_semantic_gate( and not exercise_passes_path_semantic_gate(
semantic_score=semantic_score, semantic_score=semantic_score,
title=str(row.get("title") or ""), title=title_s,
summary=str(row.get("summary") or ""), summary=summary_s,
goal=goals_by_ex.get(eid, ""), goal=goal_s,
brief=semantic_brief, brief=semantic_brief,
strict=True, strict=True,
) )
): ):
score_penalty = 0.42 score_penalty = 0.42
if roadmap_stage_match and stage_learning_goal: if roadmap_stage_match and stage_learning_goal:
title_s = str(row.get("title") or "") if exercise_passes_stage_fit(
summary_s = str(row.get("summary") or "")
goal_s = goals_by_ex.get(eid, "")
if exercise_passes_stage_learning_goal_gate(
learning_goal=stage_learning_goal, learning_goal=stage_learning_goal,
title=title_s, title=title_s,
summary=summary_s, summary=summary_s,
goal=goal_s, goal=goal_s,
semantic_score=semantic_score, stage_brief=stage_match_brief,
stage_semantic_score=stage_semantic_score,
anti_patterns=pack.get("stage_anti_patterns"),
step_phase=step_phase,
): ):
score_penalty = max(0.0, score_penalty - 0.08) score_penalty = max(0.0, score_penalty - 0.10)
stage_match_reason = "Passt zum Stufen-Lernziel" stage_match_reason = "Passt zum Stufen-Lernziel"
else: else:
score_penalty += 0.35 score_penalty += 0.48
score = ( score = (
weights.get("semantic", 0.0) * semantic_score weights.get("semantic", 0.0) * effective_semantic
+ weights["fulltext"] * ft_norm + weights["fulltext"] * ft_norm
+ weights["progression"] * prog_hit + weights["progression"] * prog_hit
+ weights["skill"] * skill_sim + weights["skill"] * skill_sim
@ -329,7 +372,11 @@ def rank_visible_library_hits(
reasons: List[str] = [] reasons: List[str] = []
if stage_match_reason: if stage_match_reason:
reasons.append(stage_match_reason) reasons.append(stage_match_reason)
if semantic_score >= 0.35 and semantic_reasons: if roadmap_stage_match and stage_semantic_score >= 0.30 and stage_semantic_reasons:
for sr in stage_semantic_reasons:
if sr not in reasons:
reasons.append(sr)
elif semantic_score >= 0.35 and semantic_reasons:
for sr in semantic_reasons: for sr in semantic_reasons:
if sr not in reasons: if sr not in reasons:
reasons.append(sr) reasons.append(sr)
@ -365,6 +412,8 @@ def rank_visible_library_hits(
"score": round(max(0.0, min(1.0, score)), 4), "score": round(max(0.0, min(1.0, score)), 4),
"reasons": reasons, "reasons": reasons,
"semantic_score": round(semantic_score, 4), "semantic_score": round(semantic_score, 4),
"stage_semantic_score": round(stage_semantic_score, 4),
"goal": goal_s,
} }
) )
succ_variants = pack.get("progression_successor_variants") or {} succ_variants = pack.get("progression_successor_variants") or {}

View File

@ -9,6 +9,7 @@ from __future__ import annotations
import json import json
import logging import logging
import re import re
from dataclasses import dataclass, field
from typing import Any, Dict, List, Mapping, Optional, Sequence, Tuple from typing import Any, Dict, List, Mapping, Optional, Sequence, Tuple
from pydantic import BaseModel, Field, field_validator from pydantic import BaseModel, Field, field_validator
@ -462,7 +463,7 @@ def score_exercise_semantic_relevance(
core_hits = sum(1 for ph in core if _phrase_in_blob(ph, blob)) core_hits = sum(1 for ph in core if _phrase_in_blob(ph, blob))
must_hits = sum(1 for ph in must if _phrase_in_blob(ph, blob)) must_hits = sum(1 for ph in must if _phrase_in_blob(ph, blob))
exclude_hits = sum(1 for ph in exclude if _phrase_in_blob(ph, blob)) exclude_hits = sum(1 for ph in exclude if _phrase_excluded_in_blob(ph, blob))
score = 0.0 score = 0.0
if core: if core:
@ -623,9 +624,82 @@ _STAGE_GOAL_STOPWORDS = _QUERY_STOPWORDS | frozenset(
) )
def _significant_stage_tokens(learning_goal: str) -> List[str]: _STAGE_NEGATION_PATTERNS = (
"""Wörter aus Stufen-Lernziel für Text-Match (ohne Füllwörter).""" r"\bohne\s+([^,.;]+)",
raw = re.findall(r"[a-zäöüß]{4,}", _normalize_phrase(learning_goal), flags=re.IGNORECASE) r"\bkein(?:e|en|er|em)?\s+([^,.;]+)",
r"\bnicht\s+([^,.;]+)",
)
# Aus „ohne Tritttechnik“ etc. — erweiterte Treffer im Übungstext
_STAGE_EXCLUDE_ALIASES: Dict[str, Tuple[str, ...]] = {
"tritttechnik": (
"tritttechnik",
"trittpraezision",
"trittpräzision",
"tritt praesision",
"tritt-präzision",
"kicktechnik",
"tritt ausführung",
"tritt ausfuehrung",
),
"kumite": ("kumite", "partnerkampf", "freikampf", "jiyu kumite"),
"kraftuebung": ("kraftuebung", "kraftübung", "krafttraining", "kraftübungen"),
"anwendung": ("kumite anwendung", "kampfanwendung"),
}
_STAGE_FOCUS_TOKENS = frozenset(
{
"koordination",
"absprung",
"beinhebung",
"landung",
"sprung",
"sprungphase",
"balance",
"gleichgewicht",
"timing",
"vorbereitung",
"athletik",
"mobilitaet",
"mobilität",
"stabilisation",
"stabilisierung",
}
)
@dataclass
class StageGoalConstraints:
positive_tokens: List[str] = field(default_factory=list)
exclude_phrases: List[str] = field(default_factory=list)
has_negation: bool = False
strict_positive: bool = False
def _expand_stage_exclude_phrase(phrase: str) -> List[str]:
norm = _normalize_phrase(phrase)
if not norm:
return []
out: List[str] = [norm]
compact = norm.replace(" ", "")
if compact and compact not in out:
out.append(compact)
for key, aliases in _STAGE_EXCLUDE_ALIASES.items():
if key in norm or norm in key:
for alias in aliases:
a = _normalize_phrase(alias)
if a and a not in out:
out.append(a)
return out[:12]
def _significant_stage_tokens(learning_goal: str, *, strip_negated: bool = True) -> List[str]:
"""Wörter aus Stufen-Lernziel für Text-Match (ohne Füllwörter, ohne Negationssegmente)."""
text = _normalize_phrase(learning_goal)
if strip_negated:
for pat in _STAGE_NEGATION_PATTERNS:
text = re.sub(pat, " ", text)
raw = re.findall(r"[a-zäöüß]{4,}", text, flags=re.IGNORECASE)
out: List[str] = [] out: List[str] = []
for w in raw: for w in raw:
low = w.lower().replace("ä", "ae").replace("ö", "oe").replace("ü", "ue") low = w.lower().replace("ä", "ae").replace("ö", "oe").replace("ü", "ue")
@ -636,19 +710,245 @@ def _significant_stage_tokens(learning_goal: str) -> List[str]:
return out[:10] return out[:10]
def parse_stage_goal_constraints(
learning_goal: str,
anti_patterns: Optional[Sequence[str]] = None,
) -> StageGoalConstraints:
"""Positiv/Negativ aus Stufen-Lernziel + anti_patterns (Roadmap-Stufe)."""
lg = (learning_goal or "").strip()
if len(lg) < 3:
return StageGoalConstraints()
norm = _normalize_phrase(lg)
exclude: List[str] = []
has_negation = False
for pat in _STAGE_NEGATION_PATTERNS:
for m in re.finditer(pat, norm):
has_negation = True
chunk = (m.group(1) or "").strip()
if chunk:
exclude.extend(_expand_stage_exclude_phrase(chunk))
for raw in anti_patterns or []:
s = _normalize_phrase(str(raw or ""))
if s:
exclude.extend(_expand_stage_exclude_phrase(s))
positive = _significant_stage_tokens(lg, strip_negated=True)
focus_hits = [t for t in positive if t in _STAGE_FOCUS_TOKENS]
strict_positive = bool(focus_hits) or has_negation
dedup_exclude: List[str] = []
for item in exclude:
if item and item not in dedup_exclude:
dedup_exclude.append(item)
return StageGoalConstraints(
positive_tokens=positive,
exclude_phrases=dedup_exclude[:16],
has_negation=has_negation,
strict_positive=strict_positive,
)
def _phrase_excluded_in_blob(phrase: str, blob: str) -> bool:
"""Treffer nur wenn das Ausschluss-Thema nicht selbst negiert beschrieben ist."""
if not phrase or not blob:
return False
if not _phrase_in_blob(phrase, blob):
return False
norm = _normalize_phrase(phrase)
for pat in _STAGE_NEGATION_PATTERNS:
for m in re.finditer(pat, blob):
chunk = _normalize_phrase(m.group(1) or "")
if not chunk:
continue
if norm in chunk or chunk in norm or _phrase_in_blob(norm, chunk):
return False
return True
def _blob_matches_stage_excludes(blob: str, exclude_phrases: Sequence[str]) -> bool:
for phrase in exclude_phrases:
if _phrase_excluded_in_blob(phrase, blob):
return True
return False
_MIN_STAGE_FIT_SEMANTIC = 0.30
_MIN_STAGE_FIT_RELAXED = 0.20
def build_stage_match_brief(
*,
learning_goal: str,
anti_patterns: Optional[Sequence[str]] = None,
success_criteria: Optional[Sequence[str]] = None,
load_profile: Optional[Sequence[str]] = None,
phase: Optional[str] = None,
path_context_note: Optional[str] = None,
) -> PlanningSemanticBrief:
"""
Stufen-zentrierter Semantik-Brief unabhängig vom Gesamt-Pfad-Thema.
Primär für Roadmap-Match: Bewertung gegen Titel + Kurzbeschreibung + Übungsziel.
"""
lg = (learning_goal or "").strip()
if len(lg) < 3:
return PlanningSemanticBrief(semantic_strength=0.0)
constraints = parse_stage_goal_constraints(lg, anti_patterns)
must: List[str] = []
norm_lg = _normalize_phrase(lg)
for token in constraints.positive_tokens:
if token not in must:
must.append(token)
if norm_lg and norm_lg not in must:
must.append(norm_lg[:120])
for raw in success_criteria or []:
s = _normalize_phrase(str(raw or ""))
if s and s not in must:
must.append(s[:100])
for raw in load_profile or []:
s = _normalize_phrase(str(raw or ""))
if s and s not in must:
must.append(s[:60])
retrieval_parts = [norm_lg]
if path_context_note:
note = _normalize_phrase(path_context_note)[:200]
if note:
retrieval_parts.append(note)
arc: List[str] = []
ph = (phase or "").strip().lower()
if ph:
arc.append(ph)
return PlanningSemanticBrief(
primary_topic="",
topic_type="focus",
must_phrases=must[:12],
exclude_phrases=list(constraints.exclude_phrases)[:12],
development_arc=arc[:4],
retrieval_query=" ".join(p for p in retrieval_parts if p)[:500],
semantic_strength=0.78,
rationale="stage_match_brief",
)
def score_exercise_stage_fit(
*,
title: str,
summary: str,
goal: str,
stage_brief: PlanningSemanticBrief,
variant_names: Optional[Sequence[str]] = None,
step_phase: Optional[str] = None,
) -> Tuple[float, List[str]]:
"""Semantik-Score Übung ↔ Stufen-Lernziel (Titel + Summary + Goal)."""
score, reasons = score_exercise_semantic_relevance(
title=title,
summary=summary,
goal=goal,
variant_names=variant_names or [],
brief=stage_brief,
step_phase=step_phase,
)
blob = _blob_from_fields(title, summary, goal, variant_names or [])
focus_tokens = [
t
for t in (stage_brief.must_phrases or [])
if t and " " not in t and len(t) >= 4
][:6]
if focus_tokens:
hits = sum(1 for t in focus_tokens if _phrase_in_blob(t, blob))
ratio = hits / len(focus_tokens)
bonus = 0.28 * ratio
if bonus > 0:
score = min(1.0, score + bonus)
if hits >= max(1, len(focus_tokens) // 2):
reasons = ["Stufen-Schwerpunkte im Übungstext", *reasons]
return max(0.0, min(1.0, round(score, 4))), reasons[:4]
def exercise_passes_stage_fit(
*,
learning_goal: str,
title: str,
summary: str = "",
goal: str = "",
stage_brief: Optional[PlanningSemanticBrief] = None,
stage_semantic_score: Optional[float] = None,
anti_patterns: Optional[Sequence[str]] = None,
step_phase: Optional[str] = None,
min_stage_semantic: float = _MIN_STAGE_FIT_SEMANTIC,
relaxed: bool = False,
) -> bool:
"""Allgemeines Stufen-Fit-Gate: voller Übungstext vs. Stufen-Brief."""
lg = (learning_goal or "").strip()
if len(lg) < 3:
return True
blob = _blob_from_fields(title, summary, goal, [])
constraints = parse_stage_goal_constraints(lg, anti_patterns)
if constraints.exclude_phrases and _blob_matches_stage_excludes(blob, constraints.exclude_phrases):
return False
brief = stage_brief or build_stage_match_brief(
learning_goal=lg,
anti_patterns=anti_patterns,
)
stage_sem = stage_semantic_score
if stage_sem is None:
stage_sem, _ = score_exercise_stage_fit(
title=title,
summary=summary,
goal=goal,
stage_brief=brief,
step_phase=step_phase,
)
threshold = _MIN_STAGE_FIT_RELAXED if relaxed else min_stage_semantic
return float(stage_sem or 0.0) >= threshold
def apply_stage_match_retrieval_weights(brief: PlanningSemanticBrief) -> Dict[str, float]:
"""Roadmap-Stufe: Stufen-Semantik (Ziel/Summary/Goal) dominiert."""
return {
"semantic": 0.58,
"fulltext": 0.14,
"profile": 0.18,
"progression": 0.04,
"skill": 0.04,
"plan": 0.02,
"repeat_unit": -0.40,
"repeat_group": -0.15,
}
def semantic_brief_for_stage( def semantic_brief_for_stage(
brief: PlanningSemanticBrief, brief: PlanningSemanticBrief,
*, *,
learning_goal: str, learning_goal: str,
phase: Optional[str] = None, phase: Optional[str] = None,
anti_patterns: Optional[Sequence[str]] = None,
) -> PlanningSemanticBrief: ) -> PlanningSemanticBrief:
"""Brief um Stufen-Lernziel erweitern — für Roadmap-Match pro Major Step.""" """Legacy: globalen Brief anreichern — bevorzugt build_stage_match_brief für Roadmap-Match."""
lg = _normalize_phrase(learning_goal) lg = _normalize_phrase(learning_goal)
if not lg: if not lg:
return brief return brief
constraints = parse_stage_goal_constraints(learning_goal, anti_patterns)
must = list(brief.must_phrases or []) must = list(brief.must_phrases or [])
for token in constraints.positive_tokens[:4]:
if token not in must:
must.append(token)
if lg not in must: if lg not in must:
must.insert(0, lg[:120]) must.insert(0, lg[:120])
exclude = list(brief.exclude_phrases or [])
for item in constraints.exclude_phrases:
if item not in exclude:
exclude.append(item)
arc = list(brief.development_arc or []) arc = list(brief.development_arc or [])
ph = (phase or "").strip().lower() ph = (phase or "").strip().lower()
if ph and ph not in arc: if ph and ph not in arc:
@ -657,6 +957,7 @@ def semantic_brief_for_stage(
return brief.model_copy( return brief.model_copy(
update={ update={
"must_phrases": must[:12], "must_phrases": must[:12],
"exclude_phrases": exclude[:12],
"development_arc": arc[:8], "development_arc": arc[:8],
"semantic_strength": min(1.0, strength), "semantic_strength": min(1.0, strength),
} }
@ -672,33 +973,24 @@ def exercise_passes_stage_learning_goal_gate(
semantic_score: float = 0.0, semantic_score: float = 0.0,
min_semantic: float = 0.20, min_semantic: float = 0.20,
relaxed: bool = False, relaxed: bool = False,
anti_patterns: Optional[Sequence[str]] = None,
stage_brief: Optional[PlanningSemanticBrief] = None,
stage_semantic_score: Optional[float] = None,
step_phase: Optional[str] = None,
) -> bool: ) -> bool:
"""Roadmap-Stufe: Übung muss zum Stufen-Lernziel passen, nicht nur zum Gesamtthema.""" """Roadmap-Stufe: delegiert an exercise_passes_stage_fit (Titel + Summary + Goal)."""
lg = (learning_goal or "").strip() del semantic_score, min_semantic
if len(lg) < 3: return exercise_passes_stage_fit(
return True learning_goal=learning_goal,
title=title,
blob = _blob_from_fields(title, summary, goal, []) summary=summary,
norm_lg = _normalize_phrase(lg) goal=goal,
if _phrase_in_blob(norm_lg, blob): stage_brief=stage_brief,
return True stage_semantic_score=stage_semantic_score,
anti_patterns=anti_patterns,
tokens = _significant_stage_tokens(lg) step_phase=step_phase,
if not tokens: relaxed=relaxed,
threshold = 0.12 if relaxed else min_semantic )
return semantic_score >= threshold
hits = sum(1 for t in tokens if _phrase_in_blob(t, blob))
if len(tokens) <= 2:
required = 1
else:
required = max(2, (len(tokens) + 1) // 2)
if hits >= required:
return True
threshold = 0.14 if relaxed else min_semantic
return semantic_score >= threshold
def exercise_passes_path_semantic_gate( def exercise_passes_path_semantic_gate(
@ -739,7 +1031,9 @@ def pick_best_path_hit(
*, *,
semantic_brief: Optional[PlanningSemanticBrief] = None, semantic_brief: Optional[PlanningSemanticBrief] = None,
stage_learning_goal: Optional[str] = None, stage_learning_goal: Optional[str] = None,
stage_anti_patterns: Optional[Sequence[str]] = None,
roadmap_stage_match: bool = False, roadmap_stage_match: bool = False,
stage_match_brief: Optional[PlanningSemanticBrief] = None,
) -> Optional[Dict[str, Any]]: ) -> Optional[Dict[str, Any]]:
"""Gestufte Auswahl: strikt → relaxed → optional Notfall-Fallback.""" """Gestufte Auswahl: strikt → relaxed → optional Notfall-Fallback."""
if not hits: if not hits:
@ -747,6 +1041,13 @@ def pick_best_path_hit(
stage_goal = (stage_learning_goal or "").strip() stage_goal = (stage_learning_goal or "").strip()
stage_brief: Optional[PlanningSemanticBrief] = stage_match_brief
if roadmap_stage_match and stage_goal and stage_brief is None:
stage_brief = build_stage_match_brief(
learning_goal=stage_goal,
anti_patterns=stage_anti_patterns,
)
def _scan(*, strict: bool) -> Optional[Dict[str, Any]]: def _scan(*, strict: bool) -> Optional[Dict[str, Any]]:
best: Optional[Dict[str, Any]] = None best: Optional[Dict[str, Any]] = None
best_key: Tuple[float, float] = (-1.0, -1.0) best_key: Tuple[float, float] = (-1.0, -1.0)
@ -754,28 +1055,38 @@ def pick_best_path_hit(
eid = int(hit["id"]) eid = int(hit["id"])
if eid in used_exercise_ids: if eid in used_exercise_ids:
continue continue
sem = float(hit.get("semantic_score") or 0.0)
title = str(hit.get("title") or "") title = str(hit.get("title") or "")
summary = str(hit.get("summary") or "") summary = str(hit.get("summary") or "")
if semantic_brief and not exercise_passes_path_semantic_gate( goal_text = str(hit.get("goal") or hit.get("exercise_goal") or "")
semantic_score=sem, sem = float(hit.get("semantic_score") or 0.0)
title=title, stage_sem = float(hit.get("stage_semantic_score") or sem)
summary=summary,
goal="", if roadmap_stage_match and stage_goal:
brief=semantic_brief, if not exercise_passes_stage_fit(
strict=strict, learning_goal=stage_goal,
): title=title,
continue summary=summary,
if stage_goal and not exercise_passes_stage_learning_goal_gate( goal=goal_text,
learning_goal=stage_goal, stage_brief=stage_brief,
title=title, stage_semantic_score=stage_sem,
summary=summary, anti_patterns=stage_anti_patterns,
semantic_score=sem, relaxed=not strict,
relaxed=not strict, ):
): continue
continue else:
if semantic_brief and not exercise_passes_path_semantic_gate(
semantic_score=sem,
title=title,
summary=summary,
goal=goal_text,
brief=semantic_brief,
strict=strict,
):
continue
score = float(hit.get("score") or 0.0) score = float(hit.get("score") or 0.0)
key = (sem, score) rank_sem = stage_sem if roadmap_stage_match and stage_goal else sem
key = (rank_sem, score)
if key > best_key: if key > best_key:
best_key = key best_key = key
best = hit best = hit
@ -820,9 +1131,15 @@ __all__ = [
"build_semantic_brief", "build_semantic_brief",
"enrich_target_with_semantic_expectations", "enrich_target_with_semantic_expectations",
"exercise_passes_path_semantic_gate", "exercise_passes_path_semantic_gate",
"StageGoalConstraints",
"apply_stage_match_retrieval_weights",
"build_stage_match_brief",
"exercise_passes_stage_fit",
"exercise_passes_stage_learning_goal_gate", "exercise_passes_stage_learning_goal_gate",
"merge_semantic_brief_llm", "merge_semantic_brief_llm",
"parse_stage_goal_constraints",
"pick_best_path_hit", "pick_best_path_hit",
"score_exercise_stage_fit",
"semantic_brief_for_stage", "semantic_brief_for_stage",
"resolve_semantic_skill_weights", "resolve_semantic_skill_weights",
"score_exercise_semantic_relevance", "score_exercise_semantic_relevance",

View File

@ -1,7 +1,9 @@
"""Tests Roadmap-Stufen-Match — Gate gegen themenfremde Übungen.""" """Tests Roadmap-Stufen-Match — Gate gegen themenfremde Übungen."""
from planning_exercise_semantics import ( from planning_exercise_semantics import (
build_stage_match_brief,
exercise_passes_stage_learning_goal_gate, exercise_passes_stage_learning_goal_gate,
pick_best_path_hit, pick_best_path_hit,
score_exercise_stage_fit,
semantic_brief_for_stage, semantic_brief_for_stage,
build_semantic_brief, build_semantic_brief,
) )
@ -35,10 +37,37 @@ def test_semantic_brief_for_stage_adds_learning_goal():
assert "hüftmobilität und kammerhaltung" in stage.must_phrases[0] assert "hüftmobilität und kammerhaltung" in stage.must_phrases[0]
def test_build_stage_match_brief_uses_stage_tokens_not_global_topic():
brief = build_stage_match_brief(
learning_goal="Koordination von Absprung und Beinhebung ohne Tritttechnik",
phase="vertiefung",
)
must_blob = " ".join(brief.must_phrases or []).lower()
assert "mawashi" not in must_blob
assert "absprung" in must_blob
assert not (brief.primary_topic or "").strip()
def test_stage_fit_prefers_goal_over_misleading_title():
stage_goal = "Koordination von Absprung und Beinhebung ohne Tritttechnik"
stage_brief = build_stage_match_brief(learning_goal=stage_goal)
kick_score, _ = score_exercise_stage_fit(
title="Mawashi Geri Trittpräzision",
summary="Kicktechnik",
goal="Präzision im Tritt und Hüftarbeit",
stage_brief=stage_brief,
)
coord_score, _ = score_exercise_stage_fit(
title="Allgemeines Sprungtraining",
summary="Athletik",
goal="Absprung, Beinhebung und Landung koordinieren — ohne Trittausführung",
stage_brief=stage_brief,
)
assert coord_score > kick_score
def test_pick_best_path_hit_roadmap_stage_no_weak_fallback(): def test_pick_best_path_hit_roadmap_stage_no_weak_fallback():
brief = build_semantic_brief("Mae Geri Perfektion") stage_brief = build_stage_match_brief(
stage_brief = semantic_brief_for_stage(
brief,
learning_goal="Hüftmobilität für Mae Geri", learning_goal="Hüftmobilität für Mae Geri",
phase="grundlage", phase="grundlage",
) )
@ -69,9 +98,7 @@ def test_pick_best_path_hit_roadmap_stage_no_weak_fallback():
def test_pick_best_path_hit_roadmap_stage_picks_relevant(): def test_pick_best_path_hit_roadmap_stage_picks_relevant():
brief = build_semantic_brief("Mae Geri Perfektion") stage_brief = build_stage_match_brief(
stage_brief = semantic_brief_for_stage(
brief,
learning_goal="Hüftmobilität für Mae Geri", learning_goal="Hüftmobilität für Mae Geri",
phase="grundlage", phase="grundlage",
) )
@ -94,3 +121,63 @@ def test_pick_best_path_hit_roadmap_stage_picks_relevant():
) )
assert chosen is not None assert chosen is not None
assert int(chosen["id"]) == 2 assert int(chosen["id"]) == 2
def test_stage_gate_rejects_tritt_when_goal_says_ohne_tritttechnik():
"""Regression: gesprungener Mawashi — Slot Koordination ohne Tritttechnik."""
goal = "Koordination von Absprung und Beinhebung ohne Tritttechnik"
assert not exercise_passes_stage_learning_goal_gate(
learning_goal=goal,
title="Verbesserung der Trittpräzision des Mawashi Geri und der Hüftbewegung",
summary="Präzision und Hüftarbeit im Stand",
semantic_score=0.72,
)
def test_stage_gate_accepts_absprung_drill_not_kick_focus():
goal = "Koordination von Absprung und Beinhebung ohne Tritttechnik"
assert exercise_passes_stage_learning_goal_gate(
learning_goal=goal,
title="Sprungkoordination — Absprung und Beinhebung",
summary="Ohne Trittausführung, Fokus Gleichgewicht und Timing",
semantic_score=0.35,
)
def test_pick_best_rejects_mawashi_tritt_precision_for_coordination_slot():
stage_goal = "Koordination von Absprung und Beinhebung ohne Tritttechnik"
stage_brief = build_stage_match_brief(learning_goal=stage_goal, phase="vertiefung")
hits = [
{
"id": 99,
"title": "Verbesserung der Trittpräzision des Mawashi Geri und der Hüftbewegung",
"summary": "Tritttechnik und Hüfte im Stand",
"score": 0.91,
"semantic_score": 0.68,
},
{
"id": 100,
"title": "Absprung und Beinhebung — Koordination ohne Kick",
"summary": "Sprungvorbereitung, kein Tritt",
"score": 0.62,
"semantic_score": 0.41,
},
]
chosen = pick_best_path_hit(
hits,
set(),
semantic_brief=stage_brief,
stage_learning_goal=stage_goal,
roadmap_stage_match=True,
)
assert chosen is not None
assert int(chosen["id"]) == 100
def test_parse_stage_goal_constraints_extracts_ohne_tritttechnik():
from planning_exercise_semantics import parse_stage_goal_constraints
c = parse_stage_goal_constraints("Koordination von Absprung und Beinhebung ohne Tritttechnik")
assert c.has_negation
assert "absprung" in c.positive_tokens
assert any("tritt" in ex for ex in c.exclude_phrases)

View File

@ -1,6 +1,6 @@
# Shinkan Jinkendo Version Information # Shinkan Jinkendo Version Information
APP_VERSION = "0.8.218" APP_VERSION = "0.8.220"
BUILD_DATE = "2026-06-07" BUILD_DATE = "2026-06-07"
DB_SCHEMA_VERSION = "20260607088" DB_SCHEMA_VERSION = "20260607088"
@ -53,6 +53,22 @@ MODULE_VERSIONS = {
} }
CHANGELOG = [ CHANGELOG = [
{
"version": "0.8.220",
"date": "2026-06-07",
"changes": [
"Roadmap-Stufen-Match: build_stage_match_brief + stage_semantic_score über Titel, Summary und Goal.",
"Retriever lädt Übungsziele immer bei Stufen-Match; Ranking nach Stufen-Fit statt Gesamtthema.",
],
},
{
"version": "0.8.219",
"date": "2026-06-07",
"changes": [
"Roadmap-Stufen-Gate: Negationen (ohne Tritttechnik) + Pflicht-Treffer Absprung/Beinhebung.",
"anti_patterns in Stufen-Match; Gesamt-Thema allein reicht bei strict_positive nicht mehr.",
],
},
{ {
"version": "0.8.218", "version": "0.8.218",
"date": "2026-06-07", "date": "2026-06-07",