Implement Learning Goal Candidate Retrieval and Roadmap Fallback Logic
All checks were successful
Deploy Development / deploy (push) Successful in 43s
Test Suite / pytest-backend (push) Successful in 43s
Test Suite / lint-backend (push) Successful in 0s
Test Suite / build-frontend (push) Successful in 13s
Test Suite / k6 /health Baseline (push) Successful in 34s
Test Suite / playwright-tests (push) Successful in 1m22s

- Added `_safe_tsquery_fragment` to sanitize learning goal input for SQL queries, improving query safety.
- Introduced `_fetch_learning_goal_library_candidate_ids` to retrieve exercise IDs matching learning goals, enhancing exercise relevance in roadmap suggestions.
- Enhanced `_match_roadmap_slot` to utilize learning goal candidates, improving the accuracy of supplemental exercise selection.
- Implemented `_pick_roadmap_rank_fallback` to provide a fallback mechanism for selecting the best exercise when strict matching fails, ensuring better exercise retrieval.
- Updated tests to validate the new learning goal retrieval and fallback logic, ensuring robustness in exercise selection processes.
This commit is contained in:
Lars 2026-06-11 12:54:07 +02:00
parent b2fbf6b4af
commit 6d130a7e09
5 changed files with 227 additions and 5 deletions

View File

@ -347,6 +347,87 @@ def _graph_visibility_context(
) )
def _safe_tsquery_fragment(text: str) -> str:
import re
cleaned = re.sub(r"[^\w\säöüßÄÖÜ]", " ", text or "", flags=re.UNICODE)
words = [w for w in cleaned.split() if len(w) >= 2][:10]
return " ".join(words) if words else (text or "")[:60].strip()
def _fetch_learning_goal_library_candidate_ids(
cur,
*,
tenant: TenantContext,
progression_graph_id: Optional[int],
learning_goal: str,
limit: int = 24,
) -> List[int]:
"""Sichtbare Übungen, deren Titel/Volltext zum Stufen-Lernziel passt."""
lg = (learning_goal or "").strip()
if len(lg) < 3:
return []
vis_sql, vis_params = _planning_visibility_sql(cur, tenant, progression_graph_id)
tsq = _safe_tsquery_fragment(lg)
like_pat = f"%{lg[:100].lower()}%"
try:
cur.execute(
f"""
SELECT e.id
FROM exercises e
WHERE ({vis_sql})
AND COALESCE(e.status, '') <> %s
AND (
lower(trim(e.title)) = lower(trim(%s))
OR lower(e.title) LIKE %s
OR (%s <> '' AND e.search_vector @@ plainto_tsquery('german', %s))
)
ORDER BY
CASE WHEN lower(trim(e.title)) = lower(trim(%s)) THEN 0 ELSE 1 END,
CASE WHEN %s <> '' THEN ts_rank_cd(e.search_vector, plainto_tsquery('german', %s)) ELSE 0 END DESC,
e.id ASC
LIMIT %s
""",
[
*vis_params,
"archived",
lg,
like_pat,
tsq,
tsq,
lg,
tsq,
tsq,
int(limit),
],
)
except Exception:
cur.execute(
f"""
SELECT e.id
FROM exercises e
WHERE ({vis_sql})
AND COALESCE(e.status, '') <> %s
AND (
lower(trim(e.title)) = lower(trim(%s))
OR lower(e.title) LIKE %s
)
ORDER BY CASE WHEN lower(trim(e.title)) = lower(trim(%s)) THEN 0 ELSE 1 END, e.id ASC
LIMIT %s
""",
[*vis_params, "archived", lg, like_pat, lg, int(limit)],
)
out: List[int] = []
for row in cur.fetchall() or []:
try:
eid = int(row.get("id") or 0)
except (TypeError, ValueError):
continue
if eid > 0:
out.append(eid)
return out
def _load_supplemental_exercise_rows( def _load_supplemental_exercise_rows(
cur, cur,
*, *,
@ -1000,10 +1081,31 @@ def _match_roadmap_slot(
step_kind = resolve_step_exercise_kind_filter(stage_spec, body.exercise_kind_any) step_kind = resolve_step_exercise_kind_filter(stage_spec, body.exercise_kind_any)
supplemental_ids = _supplemental_exercise_ids_from_body(cur, body) supplemental_ids = _supplemental_exercise_ids_from_body(cur, body)
lg_candidates = _fetch_learning_goal_library_candidate_ids(
cur,
tenant=tenant,
progression_graph_id=body.progression_graph_id,
learning_goal=stage_goal,
)
supplemental_ids = list(
dict.fromkeys(
int(x)
for x in [
*supplemental_ids,
*lg_candidates,
slot_priority_exercise_id,
]
if x is not None and int(x) > 0
)
)
priority_ids = list( priority_ids = list(
dict.fromkeys( dict.fromkeys(
x int(x)
for x in [slot_priority_exercise_id, *(body.retrieval_boost_exercise_ids or [])] for x in [
slot_priority_exercise_id,
*(body.retrieval_boost_exercise_ids or []),
*lg_candidates[:8],
]
if x is not None and int(x) > 0 if x is not None and int(x) > 0
) )
) )

View File

@ -955,6 +955,7 @@ def enrich_brief_with_path_constraints(
_MIN_STAGE_FIT_SEMANTIC = 0.30 _MIN_STAGE_FIT_SEMANTIC = 0.30
_MIN_STAGE_FIT_RELAXED = 0.20 _MIN_STAGE_FIT_RELAXED = 0.20
_MIN_TITLE_EQUIV_SEMANTIC = 0.15 _MIN_TITLE_EQUIV_SEMANTIC = 0.15
_MIN_ROADMAP_FALLBACK_RANK = 0.15
def build_stage_match_brief( def build_stage_match_brief(
@ -1260,6 +1261,76 @@ def exercise_passes_path_semantic_gate(
return False return False
def _pick_roadmap_rank_fallback(
hits: List[Dict[str, Any]],
used_exercise_ids: Set[int],
*,
stage_learning_goal: str,
stage_anti_patterns: Optional[Sequence[str]] = None,
path_primary_topic: Optional[str] = None,
path_technique_excludes: Optional[Sequence[str]] = None,
) -> Optional[Dict[str, Any]]:
"""
Roadmap-Notfall: bester Treffer nach Stufen-Ranking, wenn striktes Gate leer läuft.
Filtert weiterhin Ausschlüsse und Technik-Scope (Kumite etc.), aber ohne
Mindest-Semantik-Schwelle so finden auch wortnahe Bibliotheks-Übungen den Slot.
"""
stage_goal = (stage_learning_goal or "").strip()
if not stage_goal or not hits:
return None
best: Optional[Dict[str, Any]] = None
best_key: Tuple[float, float] = (-1.0, -1.0)
for hit in hits:
try:
eid = int(hit["id"])
except (TypeError, ValueError, KeyError):
continue
if eid in used_exercise_ids:
continue
title = str(hit.get("title") or "")
summary = str(hit.get("summary") or "")
goal_text = str(hit.get("goal") or hit.get("exercise_goal") or "")
blob = _blob_from_fields(title, summary, goal_text, [])
constraints = parse_stage_goal_constraints(stage_goal, stage_anti_patterns)
if constraints.exclude_phrases and _blob_matches_stage_excludes(
blob, constraints.exclude_phrases
):
continue
title_equiv = exercise_title_equivalent_to_stage_goal(title, stage_goal)
primary = (path_primary_topic or "").strip()
if primary and not title_equiv:
tech_excludes = list(path_technique_excludes or [])
for item in technique_sibling_excludes(primary):
if item not in tech_excludes:
tech_excludes.append(item)
if not exercise_passes_technique_path_scope(
primary_topic=primary,
title=title,
summary=summary,
goal=goal_text,
learning_goal=stage_goal,
sibling_excludes=tech_excludes,
relaxed=True,
):
continue
rank_sem = float(
hit.get("stage_rank_semantic")
or hit.get("stage_semantic_score")
or hit.get("semantic_score")
or 0.0
)
score = float(hit.get("score") or 0.0)
key = (rank_sem, score)
if key > best_key:
best_key = key
best = hit
if best is None or best_key[0] < _MIN_ROADMAP_FALLBACK_RANK:
return None
return best
def pick_best_path_hit( def pick_best_path_hit(
hits: List[Dict[str, Any]], hits: List[Dict[str, Any]],
used_exercise_ids: Set[int], used_exercise_ids: Set[int],
@ -1341,7 +1412,16 @@ def pick_best_path_hit(
if roadmap_stage_match: if roadmap_stage_match:
chosen = _scan(strict=False) chosen = _scan(strict=False)
if chosen:
return chosen return chosen
return _pick_roadmap_rank_fallback(
hits,
used_exercise_ids,
stage_learning_goal=stage_goal,
stage_anti_patterns=stage_anti_patterns,
path_primary_topic=path_primary_topic,
path_technique_excludes=path_technique_excludes,
)
chosen = _scan(strict=False) chosen = _scan(strict=False)
if chosen: if chosen:

View File

@ -856,7 +856,7 @@ def build_roadmap_unfilled_gap_specs(
"roadmap_major_step_index": stage_spec.major_step_index, "roadmap_major_step_index": stage_spec.major_step_index,
} }
) )
return specs[:5] return specs[:12]
def build_stage_specs( def build_stage_specs(

View File

@ -367,6 +367,39 @@ def test_stage_fit_passes_for_title_equivalent_with_sufficient_semantic_score():
) )
def test_roadmap_rank_fallback_picks_best_stage_semantic():
from planning_exercise_semantics import _pick_roadmap_rank_fallback
stage_goal = "Hüftmobilität für Mawashi Geri"
hits = [
{
"id": 1,
"title": "Hüftmobilität für Mawashi Geri",
"summary": "Aufwärmen",
"goal": "",
"score": 0.9,
"stage_rank_semantic": 0.32,
},
{
"id": 2,
"title": "Mawashi Hüftdehnung",
"summary": "Adduktoren und Hüfte",
"goal": "Mobilität für Mawashi Geri",
"score": 0.7,
"stage_rank_semantic": 0.58,
},
]
chosen = _pick_roadmap_rank_fallback(
hits,
set(),
stage_learning_goal=stage_goal,
path_primary_topic="mawashi geri",
path_technique_excludes=technique_sibling_excludes("mawashi geri"),
)
assert chosen is not None
assert int(chosen["id"]) == 2
def test_pick_best_prefers_semantic_fit_over_coincidental_title(): def test_pick_best_prefers_semantic_fit_over_coincidental_title():
stage_goal = "Hüftmobilität für Mawashi Geri" stage_goal = "Hüftmobilität für Mawashi Geri"
stage_brief = build_stage_match_brief(learning_goal=stage_goal) stage_brief = build_stage_match_brief(learning_goal=stage_goal)

View File

@ -727,7 +727,7 @@ export function slotsToSlotAssignments(draft) {
})) }))
} }
/** Alle Graph-Übungs-IDs für Retriever-Boost (Slots + Geschwister). */ /** Alle Graph-Übungs-IDs für Retriever-Boost (Slots + Geschwister + gespeichertes Artefakt). */
export function draftRetrievalBoostExerciseIds(draft) { export function draftRetrievalBoostExerciseIds(draft) {
const ids = new Set() const ids = new Set()
for (const slot of draft.slots || []) { for (const slot of draft.slots || []) {
@ -737,6 +737,13 @@ export function draftRetrievalBoostExerciseIds(draft) {
if (sib.kind === 'library' && sib.exerciseId != null) ids.add(sib.exerciseId) if (sib.kind === 'library' && sib.exerciseId != null) ids.add(sib.exerciseId)
} }
} }
const saved = draft?.slot_contents || draft?.planningArtifact?.slot_contents
if (Array.isArray(saved)) {
for (const raw of saved) {
const eid = raw?.primary?.exercise_id ?? raw?.exercise_id
if (eid != null && Number.isFinite(Number(eid))) ids.add(Number(eid))
}
}
return [...ids] return [...ids]
} }