From 07e147bc767c751060fbede402446580da96c91d Mon Sep 17 00:00:00 2001 From: Lars Date: Wed, 10 Jun 2026 17:02:21 +0200 Subject: [PATCH] Enhance Stage Matching and Retrieval Logic in Planning Exercise - Introduced `build_stage_match_brief` to create stage-specific semantic briefs, improving roadmap matching accuracy. - Updated path retrieval logic to differentiate between general and stage-specific semantic weights, enhancing exercise relevance. - Added support for anti-patterns and success criteria in stage matching, allowing for more nuanced exercise selection. - Enhanced tests to validate new stage matching features and ensure correct functionality against learning goals. - Incremented application version to reflect these updates. --- backend/planning_exercise_path_builder.py | 51 ++- backend/planning_exercise_path_qa.py | 2 + backend/planning_exercise_retrieval.py | 85 +++- backend/planning_exercise_semantics.py | 417 +++++++++++++++--- .../test_planning_roadmap_stage_match.py | 99 ++++- backend/version.py | 18 +- 6 files changed, 591 insertions(+), 81 deletions(-) diff --git a/backend/planning_exercise_path_builder.py b/backend/planning_exercise_path_builder.py index 7dc0b4c..473c228 100644 --- a/backend/planning_exercise_path_builder.py +++ b/backend/planning_exercise_path_builder.py @@ -32,13 +32,14 @@ from planning_exercise_retrieval import run_multistage_planning_retrieval from planning_exercise_semantics import ( PlanningSemanticBrief, apply_path_retrieval_weights, + apply_stage_match_retrieval_weights, brief_to_summary_dict, build_semantic_brief, + build_stage_match_brief, enrich_target_with_semantic_expectations, exercise_passes_path_semantic_gate, pick_best_path_hit, resolve_semantic_skill_weights, - semantic_brief_for_stage, step_phase_for_index, step_retrieval_query, try_enrich_semantic_brief_with_llm, @@ -185,14 +186,18 @@ def _pick_best_path_hit( *, semantic_brief: Optional[PlanningSemanticBrief] = None, stage_learning_goal: Optional[str] = None, + stage_anti_patterns: Optional[List[str]] = None, roadmap_stage_match: bool = False, + stage_match_brief: Optional[PlanningSemanticBrief] = None, ) -> Optional[Dict[str, Any]]: return pick_best_path_hit( hits, used_exercise_ids, semantic_brief=semantic_brief, stage_learning_goal=stage_learning_goal, + stage_anti_patterns=stage_anti_patterns, roadmap_stage_match=roadmap_stage_match, + stage_match_brief=stage_match_brief, ) @@ -292,6 +297,11 @@ def _run_path_step_retrieval( step_phase_override: Optional[str] = None, step_target_profile_override: Optional[PlanningTargetProfile] = None, stage_learning_goal: Optional[str] = None, + stage_anti_patterns: Optional[List[str]] = None, + stage_match_brief: Optional[PlanningSemanticBrief] = None, + stage_success_criteria: Optional[List[str]] = None, + stage_load_profile: Optional[List[str]] = None, + path_context_note: Optional[str] = None, ) -> Tuple[List[Dict[str, Any]], PlanningTargetProfile, Dict[str, Any], str]: step_query = step_query_override or step_retrieval_query( semantic_brief, goal_query, step_index, max_steps @@ -328,7 +338,12 @@ def _run_path_step_retrieval( "path_step_phase": step_phase_override or step_phase_for_index(semantic_brief, step_index, max_steps), "stage_learning_goal": (stage_learning_goal or "").strip() or None, + "stage_anti_patterns": list(stage_anti_patterns or []), "roadmap_stage_match": bool((stage_learning_goal or "").strip()), + "stage_match_brief": stage_match_brief, + "stage_success_criteria": list(stage_success_criteria or []), + "stage_load_profile": list(stage_load_profile or []), + "path_context_note": (path_context_note or "").strip() or None, } pack = apply_progression_context_to_pack( cur, @@ -383,7 +398,10 @@ def _run_path_step_retrieval( has_planning_reference=has_plan_ref, ) - weights = apply_path_retrieval_weights(semantic_brief) + if pack.get("roadmap_stage_match"): + weights = apply_stage_match_retrieval_weights(semantic_brief) + else: + weights = apply_path_retrieval_weights(semantic_brief) profile_id = tenant.profile_id role = tenant.global_role @@ -490,6 +508,8 @@ def _annotate_roadmap_step( step["roadmap_major_step_index"] = stage_spec.major_step_index step["roadmap_phase"] = major_step.phase if major_step else None step["roadmap_learning_goal"] = learning_goal or None + if stage_spec.anti_patterns: + step["roadmap_anti_patterns"] = list(stage_spec.anti_patterns) step["roadmap_match_source"] = "stage_spec" if skill_expectations: step["skill_expectations"] = skill_expectations @@ -569,10 +589,22 @@ def _build_steps_roadmap_first( ) step_kind = resolve_step_exercise_kind_filter(stage_spec, body.exercise_kind_any) stage_goal = (stage_spec.learning_goal or "").strip() - stage_brief = semantic_brief_for_stage( - semantic_brief, + stage_anti = list(stage_spec.anti_patterns or []) + path_context_note = None + if rs_dump: + ctx_parts = [ + str(rs_dump.get("start_situation") or "").strip()[:120], + str(rs_dump.get("target_state") or "").strip()[:120], + str(rs_dump.get("roadmap_notes") or "").strip()[:120], + ] + path_context_note = " ".join(p for p in ctx_parts if p)[:240] or None + stage_match_brief = build_stage_match_brief( learning_goal=stage_goal, + anti_patterns=stage_anti, + success_criteria=list(stage_spec.success_criteria or []), + load_profile=list(stage_spec.load_profile or []), phase=major.phase if major else None, + path_context_note=path_context_note, ) hits, _, _, _ = _run_path_step_retrieval( @@ -587,21 +619,28 @@ def _build_steps_roadmap_first( progression_graph_id=body.progression_graph_id, include_llm_intent=body.include_llm_intent and step_index == 0, exercise_kind_any=step_kind, - semantic_brief=stage_brief, + semantic_brief=stage_match_brief, path_target_profile=path_target_profile, path_intent=path_intent, step_query_override=step_query, step_phase_override=major.phase if major else None, step_target_profile_override=step_target, stage_learning_goal=stage_goal or None, + stage_anti_patterns=stage_anti or None, + stage_match_brief=stage_match_brief, + stage_success_criteria=list(stage_spec.success_criteria or []), + stage_load_profile=list(stage_spec.load_profile or []), + path_context_note=path_context_note, ) hit = _pick_best_path_hit( hits, used, - semantic_brief=stage_brief, + semantic_brief=stage_match_brief, stage_learning_goal=stage_goal or None, + stage_anti_patterns=stage_anti or None, roadmap_stage_match=True, + stage_match_brief=stage_match_brief, ) if not hit: diff --git a/backend/planning_exercise_path_qa.py b/backend/planning_exercise_path_qa.py index 2fdf291..d8880bc 100644 --- a/backend/planning_exercise_path_qa.py +++ b/backend/planning_exercise_path_qa.py @@ -426,12 +426,14 @@ def detect_off_topic_steps( brief=step_brief, step_phase=phase, ) + stage_anti = list(step.get("roadmap_anti_patterns") or []) if stage_goal and not exercise_passes_stage_learning_goal_gate( learning_goal=stage_goal, title=bundle["title"], summary=bundle["summary"], goal=bundle["goal"], semantic_score=sem, + anti_patterns=stage_anti or None, ): off_topic.append( { diff --git a/backend/planning_exercise_retrieval.py b/backend/planning_exercise_retrieval.py index 42af3e8..3051518 100644 --- a/backend/planning_exercise_retrieval.py +++ b/backend/planning_exercise_retrieval.py @@ -14,11 +14,14 @@ from planning_exercise_profiles import ( load_exercise_match_profiles_bulk, score_exercise_against_target, ) +from exercise_ai import strip_html_to_plain from planning_exercise_semantics import ( PlanningSemanticBrief, + build_stage_match_brief, exercise_passes_path_semantic_gate, - exercise_passes_stage_learning_goal_gate, + exercise_passes_stage_fit, score_exercise_semantic_relevance, + score_exercise_stage_fit, ) _MAX_LIBRARY_ROWS = 8000 @@ -149,7 +152,7 @@ def _load_exercise_goals_chunked(cur, exercise_ids: Sequence[int], *, batch: int ph = ",".join(["%s"] * len(chunk)) cur.execute(f"SELECT id, goal FROM exercises WHERE id IN ({ph})", chunk) for row in cur.fetchall(): - out[int(row["id"])] = str(row.get("goal") or "") + out[int(row["id"])] = strip_html_to_plain(row.get("goal"), max_len=1200) return out @@ -203,6 +206,19 @@ def rank_visible_library_hits( path_mode = pack.get("context_mode") == "progression_path" stage_learning_goal = (pack.get("stage_learning_goal") or "").strip() roadmap_stage_match = bool(pack.get("roadmap_stage_match")) + stage_match_brief_raw = pack.get("stage_match_brief") + stage_match_brief: Optional[PlanningSemanticBrief] = None + if isinstance(stage_match_brief_raw, PlanningSemanticBrief): + stage_match_brief = stage_match_brief_raw + elif roadmap_stage_match and stage_learning_goal: + stage_match_brief = build_stage_match_brief( + learning_goal=stage_learning_goal, + anti_patterns=pack.get("stage_anti_patterns"), + success_criteria=pack.get("stage_success_criteria"), + load_profile=pack.get("stage_load_profile"), + phase=step_phase, + path_context_note=pack.get("path_context_note"), + ) last_planned_skills: Set[int] = set() planned_ids = pack.get("planned_exercise_ids") or [] @@ -229,7 +245,11 @@ def rank_visible_library_hits( skills_by_ex = _load_skill_sets_chunked(cur, cand_ids) goals_by_ex: Dict[int, str] = {} variants_by_ex: Dict[int, List[str]] = {} - if semantic_brief and semantic_brief.semantic_strength > 0.05: + need_exercise_semantic_text = ( + (semantic_brief and semantic_brief.semantic_strength > 0.05) + or (stage_match_brief and stage_match_brief.semantic_strength > 0.05) + ) + if need_exercise_semantic_text: goals_by_ex = _load_exercise_goals_chunked(cur, cand_ids) variants_by_ex = _load_variant_names_chunked(cur, cand_ids) @@ -270,52 +290,75 @@ def rank_visible_library_hits( emp, target, intent=intent ) + title_s = str(row.get("title") or "") + summary_s = str(row.get("summary") or "") + goal_s = goals_by_ex.get(eid, "") + semantic_score = 0.0 semantic_reasons: List[str] = [] if semantic_brief and semantic_brief.semantic_strength > 0.05: semantic_score, semantic_reasons = score_exercise_semantic_relevance( - title=str(row.get("title") or ""), - summary=str(row.get("summary") or ""), - goal=goals_by_ex.get(eid, ""), + title=title_s, + summary=summary_s, + goal=goal_s, variant_names=variants_by_ex.get(eid, []), brief=semantic_brief, step_phase=step_phase, ) + stage_semantic_score = 0.0 + stage_semantic_reasons: List[str] = [] + if stage_match_brief and stage_match_brief.semantic_strength > 0.05: + stage_semantic_score, stage_semantic_reasons = score_exercise_stage_fit( + title=title_s, + summary=summary_s, + goal=goal_s, + variant_names=variants_by_ex.get(eid, []), + stage_brief=stage_match_brief, + step_phase=step_phase, + ) + + effective_semantic = ( + stage_semantic_score + if roadmap_stage_match and stage_match_brief + else semantic_score + ) + score_penalty = 0.0 stage_match_reason: Optional[str] = None if ( path_mode + and not roadmap_stage_match and semantic_brief and semantic_brief.semantic_strength >= 0.55 and not exercise_passes_path_semantic_gate( semantic_score=semantic_score, - title=str(row.get("title") or ""), - summary=str(row.get("summary") or ""), - goal=goals_by_ex.get(eid, ""), + title=title_s, + summary=summary_s, + goal=goal_s, brief=semantic_brief, strict=True, ) ): score_penalty = 0.42 if roadmap_stage_match and stage_learning_goal: - title_s = str(row.get("title") or "") - summary_s = str(row.get("summary") or "") - goal_s = goals_by_ex.get(eid, "") - if exercise_passes_stage_learning_goal_gate( + if exercise_passes_stage_fit( learning_goal=stage_learning_goal, title=title_s, summary=summary_s, goal=goal_s, - semantic_score=semantic_score, + stage_brief=stage_match_brief, + stage_semantic_score=stage_semantic_score, + anti_patterns=pack.get("stage_anti_patterns"), + step_phase=step_phase, ): - score_penalty = max(0.0, score_penalty - 0.08) + score_penalty = max(0.0, score_penalty - 0.10) stage_match_reason = "Passt zum Stufen-Lernziel" else: - score_penalty += 0.35 + score_penalty += 0.48 score = ( - weights.get("semantic", 0.0) * semantic_score + weights.get("semantic", 0.0) * effective_semantic + weights["fulltext"] * ft_norm + weights["progression"] * prog_hit + weights["skill"] * skill_sim @@ -329,7 +372,11 @@ def rank_visible_library_hits( reasons: List[str] = [] if stage_match_reason: reasons.append(stage_match_reason) - if semantic_score >= 0.35 and semantic_reasons: + if roadmap_stage_match and stage_semantic_score >= 0.30 and stage_semantic_reasons: + for sr in stage_semantic_reasons: + if sr not in reasons: + reasons.append(sr) + elif semantic_score >= 0.35 and semantic_reasons: for sr in semantic_reasons: if sr not in reasons: reasons.append(sr) @@ -365,6 +412,8 @@ def rank_visible_library_hits( "score": round(max(0.0, min(1.0, score)), 4), "reasons": reasons, "semantic_score": round(semantic_score, 4), + "stage_semantic_score": round(stage_semantic_score, 4), + "goal": goal_s, } ) succ_variants = pack.get("progression_successor_variants") or {} diff --git a/backend/planning_exercise_semantics.py b/backend/planning_exercise_semantics.py index e91a6a0..45a1adc 100644 --- a/backend/planning_exercise_semantics.py +++ b/backend/planning_exercise_semantics.py @@ -9,6 +9,7 @@ from __future__ import annotations import json import logging import re +from dataclasses import dataclass, field from typing import Any, Dict, List, Mapping, Optional, Sequence, Tuple from pydantic import BaseModel, Field, field_validator @@ -462,7 +463,7 @@ def score_exercise_semantic_relevance( core_hits = sum(1 for ph in core if _phrase_in_blob(ph, blob)) must_hits = sum(1 for ph in must if _phrase_in_blob(ph, blob)) - exclude_hits = sum(1 for ph in exclude if _phrase_in_blob(ph, blob)) + exclude_hits = sum(1 for ph in exclude if _phrase_excluded_in_blob(ph, blob)) score = 0.0 if core: @@ -623,9 +624,82 @@ _STAGE_GOAL_STOPWORDS = _QUERY_STOPWORDS | frozenset( ) -def _significant_stage_tokens(learning_goal: str) -> List[str]: - """Wörter aus Stufen-Lernziel für Text-Match (ohne Füllwörter).""" - raw = re.findall(r"[a-zäöüß]{4,}", _normalize_phrase(learning_goal), flags=re.IGNORECASE) +_STAGE_NEGATION_PATTERNS = ( + r"\bohne\s+([^,.;]+)", + r"\bkein(?:e|en|er|em)?\s+([^,.;]+)", + r"\bnicht\s+([^,.;]+)", +) + +# Aus „ohne Tritttechnik“ etc. — erweiterte Treffer im Übungstext +_STAGE_EXCLUDE_ALIASES: Dict[str, Tuple[str, ...]] = { + "tritttechnik": ( + "tritttechnik", + "trittpraezision", + "trittpräzision", + "tritt praesision", + "tritt-präzision", + "kicktechnik", + "tritt ausführung", + "tritt ausfuehrung", + ), + "kumite": ("kumite", "partnerkampf", "freikampf", "jiyu kumite"), + "kraftuebung": ("kraftuebung", "kraftübung", "krafttraining", "kraftübungen"), + "anwendung": ("kumite anwendung", "kampfanwendung"), +} + +_STAGE_FOCUS_TOKENS = frozenset( + { + "koordination", + "absprung", + "beinhebung", + "landung", + "sprung", + "sprungphase", + "balance", + "gleichgewicht", + "timing", + "vorbereitung", + "athletik", + "mobilitaet", + "mobilität", + "stabilisation", + "stabilisierung", + } +) + + +@dataclass +class StageGoalConstraints: + positive_tokens: List[str] = field(default_factory=list) + exclude_phrases: List[str] = field(default_factory=list) + has_negation: bool = False + strict_positive: bool = False + + +def _expand_stage_exclude_phrase(phrase: str) -> List[str]: + norm = _normalize_phrase(phrase) + if not norm: + return [] + out: List[str] = [norm] + compact = norm.replace(" ", "") + if compact and compact not in out: + out.append(compact) + for key, aliases in _STAGE_EXCLUDE_ALIASES.items(): + if key in norm or norm in key: + for alias in aliases: + a = _normalize_phrase(alias) + if a and a not in out: + out.append(a) + return out[:12] + + +def _significant_stage_tokens(learning_goal: str, *, strip_negated: bool = True) -> List[str]: + """Wörter aus Stufen-Lernziel für Text-Match (ohne Füllwörter, ohne Negationssegmente).""" + text = _normalize_phrase(learning_goal) + if strip_negated: + for pat in _STAGE_NEGATION_PATTERNS: + text = re.sub(pat, " ", text) + raw = re.findall(r"[a-zäöüß]{4,}", text, flags=re.IGNORECASE) out: List[str] = [] for w in raw: low = w.lower().replace("ä", "ae").replace("ö", "oe").replace("ü", "ue") @@ -636,19 +710,245 @@ def _significant_stage_tokens(learning_goal: str) -> List[str]: return out[:10] +def parse_stage_goal_constraints( + learning_goal: str, + anti_patterns: Optional[Sequence[str]] = None, +) -> StageGoalConstraints: + """Positiv/Negativ aus Stufen-Lernziel + anti_patterns (Roadmap-Stufe).""" + lg = (learning_goal or "").strip() + if len(lg) < 3: + return StageGoalConstraints() + + norm = _normalize_phrase(lg) + exclude: List[str] = [] + has_negation = False + for pat in _STAGE_NEGATION_PATTERNS: + for m in re.finditer(pat, norm): + has_negation = True + chunk = (m.group(1) or "").strip() + if chunk: + exclude.extend(_expand_stage_exclude_phrase(chunk)) + + for raw in anti_patterns or []: + s = _normalize_phrase(str(raw or "")) + if s: + exclude.extend(_expand_stage_exclude_phrase(s)) + + positive = _significant_stage_tokens(lg, strip_negated=True) + focus_hits = [t for t in positive if t in _STAGE_FOCUS_TOKENS] + strict_positive = bool(focus_hits) or has_negation + + dedup_exclude: List[str] = [] + for item in exclude: + if item and item not in dedup_exclude: + dedup_exclude.append(item) + + return StageGoalConstraints( + positive_tokens=positive, + exclude_phrases=dedup_exclude[:16], + has_negation=has_negation, + strict_positive=strict_positive, + ) + + +def _phrase_excluded_in_blob(phrase: str, blob: str) -> bool: + """Treffer nur wenn das Ausschluss-Thema nicht selbst negiert beschrieben ist.""" + if not phrase or not blob: + return False + if not _phrase_in_blob(phrase, blob): + return False + norm = _normalize_phrase(phrase) + for pat in _STAGE_NEGATION_PATTERNS: + for m in re.finditer(pat, blob): + chunk = _normalize_phrase(m.group(1) or "") + if not chunk: + continue + if norm in chunk or chunk in norm or _phrase_in_blob(norm, chunk): + return False + return True + + +def _blob_matches_stage_excludes(blob: str, exclude_phrases: Sequence[str]) -> bool: + for phrase in exclude_phrases: + if _phrase_excluded_in_blob(phrase, blob): + return True + return False + + +_MIN_STAGE_FIT_SEMANTIC = 0.30 +_MIN_STAGE_FIT_RELAXED = 0.20 + + +def build_stage_match_brief( + *, + learning_goal: str, + anti_patterns: Optional[Sequence[str]] = None, + success_criteria: Optional[Sequence[str]] = None, + load_profile: Optional[Sequence[str]] = None, + phase: Optional[str] = None, + path_context_note: Optional[str] = None, +) -> PlanningSemanticBrief: + """ + Stufen-zentrierter Semantik-Brief — unabhängig vom Gesamt-Pfad-Thema. + + Primär für Roadmap-Match: Bewertung gegen Titel + Kurzbeschreibung + Übungsziel. + """ + lg = (learning_goal or "").strip() + if len(lg) < 3: + return PlanningSemanticBrief(semantic_strength=0.0) + + constraints = parse_stage_goal_constraints(lg, anti_patterns) + must: List[str] = [] + norm_lg = _normalize_phrase(lg) + for token in constraints.positive_tokens: + if token not in must: + must.append(token) + if norm_lg and norm_lg not in must: + must.append(norm_lg[:120]) + for raw in success_criteria or []: + s = _normalize_phrase(str(raw or "")) + if s and s not in must: + must.append(s[:100]) + for raw in load_profile or []: + s = _normalize_phrase(str(raw or "")) + if s and s not in must: + must.append(s[:60]) + + retrieval_parts = [norm_lg] + if path_context_note: + note = _normalize_phrase(path_context_note)[:200] + if note: + retrieval_parts.append(note) + + arc: List[str] = [] + ph = (phase or "").strip().lower() + if ph: + arc.append(ph) + + return PlanningSemanticBrief( + primary_topic="", + topic_type="focus", + must_phrases=must[:12], + exclude_phrases=list(constraints.exclude_phrases)[:12], + development_arc=arc[:4], + retrieval_query=" ".join(p for p in retrieval_parts if p)[:500], + semantic_strength=0.78, + rationale="stage_match_brief", + ) + + +def score_exercise_stage_fit( + *, + title: str, + summary: str, + goal: str, + stage_brief: PlanningSemanticBrief, + variant_names: Optional[Sequence[str]] = None, + step_phase: Optional[str] = None, +) -> Tuple[float, List[str]]: + """Semantik-Score Übung ↔ Stufen-Lernziel (Titel + Summary + Goal).""" + score, reasons = score_exercise_semantic_relevance( + title=title, + summary=summary, + goal=goal, + variant_names=variant_names or [], + brief=stage_brief, + step_phase=step_phase, + ) + blob = _blob_from_fields(title, summary, goal, variant_names or []) + focus_tokens = [ + t + for t in (stage_brief.must_phrases or []) + if t and " " not in t and len(t) >= 4 + ][:6] + if focus_tokens: + hits = sum(1 for t in focus_tokens if _phrase_in_blob(t, blob)) + ratio = hits / len(focus_tokens) + bonus = 0.28 * ratio + if bonus > 0: + score = min(1.0, score + bonus) + if hits >= max(1, len(focus_tokens) // 2): + reasons = ["Stufen-Schwerpunkte im Übungstext", *reasons] + return max(0.0, min(1.0, round(score, 4))), reasons[:4] + + +def exercise_passes_stage_fit( + *, + learning_goal: str, + title: str, + summary: str = "", + goal: str = "", + stage_brief: Optional[PlanningSemanticBrief] = None, + stage_semantic_score: Optional[float] = None, + anti_patterns: Optional[Sequence[str]] = None, + step_phase: Optional[str] = None, + min_stage_semantic: float = _MIN_STAGE_FIT_SEMANTIC, + relaxed: bool = False, +) -> bool: + """Allgemeines Stufen-Fit-Gate: voller Übungstext vs. Stufen-Brief.""" + lg = (learning_goal or "").strip() + if len(lg) < 3: + return True + + blob = _blob_from_fields(title, summary, goal, []) + constraints = parse_stage_goal_constraints(lg, anti_patterns) + if constraints.exclude_phrases and _blob_matches_stage_excludes(blob, constraints.exclude_phrases): + return False + + brief = stage_brief or build_stage_match_brief( + learning_goal=lg, + anti_patterns=anti_patterns, + ) + stage_sem = stage_semantic_score + if stage_sem is None: + stage_sem, _ = score_exercise_stage_fit( + title=title, + summary=summary, + goal=goal, + stage_brief=brief, + step_phase=step_phase, + ) + + threshold = _MIN_STAGE_FIT_RELAXED if relaxed else min_stage_semantic + return float(stage_sem or 0.0) >= threshold + + +def apply_stage_match_retrieval_weights(brief: PlanningSemanticBrief) -> Dict[str, float]: + """Roadmap-Stufe: Stufen-Semantik (Ziel/Summary/Goal) dominiert.""" + return { + "semantic": 0.58, + "fulltext": 0.14, + "profile": 0.18, + "progression": 0.04, + "skill": 0.04, + "plan": 0.02, + "repeat_unit": -0.40, + "repeat_group": -0.15, + } + + def semantic_brief_for_stage( brief: PlanningSemanticBrief, *, learning_goal: str, phase: Optional[str] = None, + anti_patterns: Optional[Sequence[str]] = None, ) -> PlanningSemanticBrief: - """Brief um Stufen-Lernziel erweitern — für Roadmap-Match pro Major Step.""" + """Legacy: globalen Brief anreichern — bevorzugt build_stage_match_brief für Roadmap-Match.""" lg = _normalize_phrase(learning_goal) if not lg: return brief + constraints = parse_stage_goal_constraints(learning_goal, anti_patterns) must = list(brief.must_phrases or []) + for token in constraints.positive_tokens[:4]: + if token not in must: + must.append(token) if lg not in must: must.insert(0, lg[:120]) + exclude = list(brief.exclude_phrases or []) + for item in constraints.exclude_phrases: + if item not in exclude: + exclude.append(item) arc = list(brief.development_arc or []) ph = (phase or "").strip().lower() if ph and ph not in arc: @@ -657,6 +957,7 @@ def semantic_brief_for_stage( return brief.model_copy( update={ "must_phrases": must[:12], + "exclude_phrases": exclude[:12], "development_arc": arc[:8], "semantic_strength": min(1.0, strength), } @@ -672,33 +973,24 @@ def exercise_passes_stage_learning_goal_gate( semantic_score: float = 0.0, min_semantic: float = 0.20, relaxed: bool = False, + anti_patterns: Optional[Sequence[str]] = None, + stage_brief: Optional[PlanningSemanticBrief] = None, + stage_semantic_score: Optional[float] = None, + step_phase: Optional[str] = None, ) -> bool: - """Roadmap-Stufe: Übung muss zum Stufen-Lernziel passen, nicht nur zum Gesamtthema.""" - lg = (learning_goal or "").strip() - if len(lg) < 3: - return True - - blob = _blob_from_fields(title, summary, goal, []) - norm_lg = _normalize_phrase(lg) - if _phrase_in_blob(norm_lg, blob): - return True - - tokens = _significant_stage_tokens(lg) - if not tokens: - threshold = 0.12 if relaxed else min_semantic - return semantic_score >= threshold - - hits = sum(1 for t in tokens if _phrase_in_blob(t, blob)) - if len(tokens) <= 2: - required = 1 - else: - required = max(2, (len(tokens) + 1) // 2) - - if hits >= required: - return True - - threshold = 0.14 if relaxed else min_semantic - return semantic_score >= threshold + """Roadmap-Stufe: delegiert an exercise_passes_stage_fit (Titel + Summary + Goal).""" + del semantic_score, min_semantic + return exercise_passes_stage_fit( + learning_goal=learning_goal, + title=title, + summary=summary, + goal=goal, + stage_brief=stage_brief, + stage_semantic_score=stage_semantic_score, + anti_patterns=anti_patterns, + step_phase=step_phase, + relaxed=relaxed, + ) def exercise_passes_path_semantic_gate( @@ -739,7 +1031,9 @@ def pick_best_path_hit( *, semantic_brief: Optional[PlanningSemanticBrief] = None, stage_learning_goal: Optional[str] = None, + stage_anti_patterns: Optional[Sequence[str]] = None, roadmap_stage_match: bool = False, + stage_match_brief: Optional[PlanningSemanticBrief] = None, ) -> Optional[Dict[str, Any]]: """Gestufte Auswahl: strikt → relaxed → optional Notfall-Fallback.""" if not hits: @@ -747,6 +1041,13 @@ def pick_best_path_hit( stage_goal = (stage_learning_goal or "").strip() + stage_brief: Optional[PlanningSemanticBrief] = stage_match_brief + if roadmap_stage_match and stage_goal and stage_brief is None: + stage_brief = build_stage_match_brief( + learning_goal=stage_goal, + anti_patterns=stage_anti_patterns, + ) + def _scan(*, strict: bool) -> Optional[Dict[str, Any]]: best: Optional[Dict[str, Any]] = None best_key: Tuple[float, float] = (-1.0, -1.0) @@ -754,28 +1055,38 @@ def pick_best_path_hit( eid = int(hit["id"]) if eid in used_exercise_ids: continue - sem = float(hit.get("semantic_score") or 0.0) title = str(hit.get("title") or "") summary = str(hit.get("summary") or "") - if semantic_brief and not exercise_passes_path_semantic_gate( - semantic_score=sem, - title=title, - summary=summary, - goal="", - brief=semantic_brief, - strict=strict, - ): - continue - if stage_goal and not exercise_passes_stage_learning_goal_gate( - learning_goal=stage_goal, - title=title, - summary=summary, - semantic_score=sem, - relaxed=not strict, - ): - continue + goal_text = str(hit.get("goal") or hit.get("exercise_goal") or "") + sem = float(hit.get("semantic_score") or 0.0) + stage_sem = float(hit.get("stage_semantic_score") or sem) + + if roadmap_stage_match and stage_goal: + if not exercise_passes_stage_fit( + learning_goal=stage_goal, + title=title, + summary=summary, + goal=goal_text, + stage_brief=stage_brief, + stage_semantic_score=stage_sem, + anti_patterns=stage_anti_patterns, + relaxed=not strict, + ): + continue + else: + if semantic_brief and not exercise_passes_path_semantic_gate( + semantic_score=sem, + title=title, + summary=summary, + goal=goal_text, + brief=semantic_brief, + strict=strict, + ): + continue + score = float(hit.get("score") or 0.0) - key = (sem, score) + rank_sem = stage_sem if roadmap_stage_match and stage_goal else sem + key = (rank_sem, score) if key > best_key: best_key = key best = hit @@ -820,9 +1131,15 @@ __all__ = [ "build_semantic_brief", "enrich_target_with_semantic_expectations", "exercise_passes_path_semantic_gate", + "StageGoalConstraints", + "apply_stage_match_retrieval_weights", + "build_stage_match_brief", + "exercise_passes_stage_fit", "exercise_passes_stage_learning_goal_gate", "merge_semantic_brief_llm", + "parse_stage_goal_constraints", "pick_best_path_hit", + "score_exercise_stage_fit", "semantic_brief_for_stage", "resolve_semantic_skill_weights", "score_exercise_semantic_relevance", diff --git a/backend/tests/test_planning_roadmap_stage_match.py b/backend/tests/test_planning_roadmap_stage_match.py index 6fc529a..ce8a572 100644 --- a/backend/tests/test_planning_roadmap_stage_match.py +++ b/backend/tests/test_planning_roadmap_stage_match.py @@ -1,7 +1,9 @@ """Tests Roadmap-Stufen-Match — Gate gegen themenfremde Übungen.""" from planning_exercise_semantics import ( + build_stage_match_brief, exercise_passes_stage_learning_goal_gate, pick_best_path_hit, + score_exercise_stage_fit, semantic_brief_for_stage, build_semantic_brief, ) @@ -35,10 +37,37 @@ def test_semantic_brief_for_stage_adds_learning_goal(): assert "hüftmobilität und kammerhaltung" in stage.must_phrases[0] +def test_build_stage_match_brief_uses_stage_tokens_not_global_topic(): + brief = build_stage_match_brief( + learning_goal="Koordination von Absprung und Beinhebung ohne Tritttechnik", + phase="vertiefung", + ) + must_blob = " ".join(brief.must_phrases or []).lower() + assert "mawashi" not in must_blob + assert "absprung" in must_blob + assert not (brief.primary_topic or "").strip() + + +def test_stage_fit_prefers_goal_over_misleading_title(): + stage_goal = "Koordination von Absprung und Beinhebung ohne Tritttechnik" + stage_brief = build_stage_match_brief(learning_goal=stage_goal) + kick_score, _ = score_exercise_stage_fit( + title="Mawashi Geri Trittpräzision", + summary="Kicktechnik", + goal="Präzision im Tritt und Hüftarbeit", + stage_brief=stage_brief, + ) + coord_score, _ = score_exercise_stage_fit( + title="Allgemeines Sprungtraining", + summary="Athletik", + goal="Absprung, Beinhebung und Landung koordinieren — ohne Trittausführung", + stage_brief=stage_brief, + ) + assert coord_score > kick_score + + def test_pick_best_path_hit_roadmap_stage_no_weak_fallback(): - brief = build_semantic_brief("Mae Geri Perfektion") - stage_brief = semantic_brief_for_stage( - brief, + stage_brief = build_stage_match_brief( learning_goal="Hüftmobilität für Mae Geri", phase="grundlage", ) @@ -69,9 +98,7 @@ def test_pick_best_path_hit_roadmap_stage_no_weak_fallback(): def test_pick_best_path_hit_roadmap_stage_picks_relevant(): - brief = build_semantic_brief("Mae Geri Perfektion") - stage_brief = semantic_brief_for_stage( - brief, + stage_brief = build_stage_match_brief( learning_goal="Hüftmobilität für Mae Geri", phase="grundlage", ) @@ -94,3 +121,63 @@ def test_pick_best_path_hit_roadmap_stage_picks_relevant(): ) assert chosen is not None assert int(chosen["id"]) == 2 + + +def test_stage_gate_rejects_tritt_when_goal_says_ohne_tritttechnik(): + """Regression: gesprungener Mawashi — Slot Koordination ohne Tritttechnik.""" + goal = "Koordination von Absprung und Beinhebung ohne Tritttechnik" + assert not exercise_passes_stage_learning_goal_gate( + learning_goal=goal, + title="Verbesserung der Trittpräzision des Mawashi Geri und der Hüftbewegung", + summary="Präzision und Hüftarbeit im Stand", + semantic_score=0.72, + ) + + +def test_stage_gate_accepts_absprung_drill_not_kick_focus(): + goal = "Koordination von Absprung und Beinhebung ohne Tritttechnik" + assert exercise_passes_stage_learning_goal_gate( + learning_goal=goal, + title="Sprungkoordination — Absprung und Beinhebung", + summary="Ohne Trittausführung, Fokus Gleichgewicht und Timing", + semantic_score=0.35, + ) + + +def test_pick_best_rejects_mawashi_tritt_precision_for_coordination_slot(): + stage_goal = "Koordination von Absprung und Beinhebung ohne Tritttechnik" + stage_brief = build_stage_match_brief(learning_goal=stage_goal, phase="vertiefung") + hits = [ + { + "id": 99, + "title": "Verbesserung der Trittpräzision des Mawashi Geri und der Hüftbewegung", + "summary": "Tritttechnik und Hüfte im Stand", + "score": 0.91, + "semantic_score": 0.68, + }, + { + "id": 100, + "title": "Absprung und Beinhebung — Koordination ohne Kick", + "summary": "Sprungvorbereitung, kein Tritt", + "score": 0.62, + "semantic_score": 0.41, + }, + ] + chosen = pick_best_path_hit( + hits, + set(), + semantic_brief=stage_brief, + stage_learning_goal=stage_goal, + roadmap_stage_match=True, + ) + assert chosen is not None + assert int(chosen["id"]) == 100 + + +def test_parse_stage_goal_constraints_extracts_ohne_tritttechnik(): + from planning_exercise_semantics import parse_stage_goal_constraints + + c = parse_stage_goal_constraints("Koordination von Absprung und Beinhebung ohne Tritttechnik") + assert c.has_negation + assert "absprung" in c.positive_tokens + assert any("tritt" in ex for ex in c.exclude_phrases) diff --git a/backend/version.py b/backend/version.py index 00dacd4..9469e76 100644 --- a/backend/version.py +++ b/backend/version.py @@ -1,6 +1,6 @@ # Shinkan Jinkendo Version Information -APP_VERSION = "0.8.218" +APP_VERSION = "0.8.220" BUILD_DATE = "2026-06-07" DB_SCHEMA_VERSION = "20260607088" @@ -53,6 +53,22 @@ MODULE_VERSIONS = { } CHANGELOG = [ + { + "version": "0.8.220", + "date": "2026-06-07", + "changes": [ + "Roadmap-Stufen-Match: build_stage_match_brief + stage_semantic_score über Titel, Summary und Goal.", + "Retriever lädt Übungsziele immer bei Stufen-Match; Ranking nach Stufen-Fit statt Gesamtthema.", + ], + }, + { + "version": "0.8.219", + "date": "2026-06-07", + "changes": [ + "Roadmap-Stufen-Gate: Negationen (ohne Tritttechnik) + Pflicht-Treffer Absprung/Beinhebung.", + "anti_patterns in Stufen-Match; Gesamt-Thema allein reicht bei strict_positive nicht mehr.", + ], + }, { "version": "0.8.218", "date": "2026-06-07",