All checks were successful
Deploy Development / deploy (push) Successful in 46s
Test Suite / pytest-backend (push) Successful in 44s
Test Suite / lint-backend (push) Successful in 0s
Test Suite / build-frontend (push) Successful in 14s
Test Suite / k6 /health Baseline (push) Successful in 33s
Test Suite / playwright-tests (push) Successful in 1m26s
- Introduced new functions `_off_topic_semantic_scores_by_slot` and `_score_exercise_stage_fit_for_spec` to improve the evaluation of off-topic steps and exercise stage fit, enhancing the quality assessment process. - Updated `_run_unified_slot_improvement_review` to incorporate off-topic scores and exercise stage fit scoring, refining the decision-making process for slot suggestions. - Enhanced existing logic to streamline the handling of slot scores and improve the overall robustness of slot management in path evaluations.
114 lines
3.4 KiB
Python
114 lines
3.4 KiB
Python
"""Schachstellen-Erkennung für unified Slot-Review."""
|
|
from planning_exercise_path_builder import (
|
|
_parse_slot_refs_from_text,
|
|
_problematic_slots_from_path_qa,
|
|
_slot_auto_select_library,
|
|
_slot_suggestion_accepted,
|
|
)
|
|
from planning_progression_roadmap import StageSpecArtifact
|
|
|
|
|
|
def _spec(midx: int) -> StageSpecArtifact:
|
|
return StageSpecArtifact(
|
|
major_step_index=midx,
|
|
learning_goal=f"Lernziel Slot {midx + 1}",
|
|
load_profile=[],
|
|
exercise_type="",
|
|
success_criteria=[],
|
|
anti_patterns=[],
|
|
)
|
|
|
|
|
|
def test_problematic_slots_from_optimization_hints():
|
|
qa = {
|
|
"optimization_hints": [
|
|
{
|
|
"action": "rematch_slot",
|
|
"step_index": 1,
|
|
"issue": "stage_mismatch",
|
|
"reason": "Übung passt nicht zur Stufe",
|
|
}
|
|
],
|
|
"off_topic_steps": [],
|
|
}
|
|
steps = [
|
|
{"roadmap_major_step_index": 0, "exercise_id": 1, "title": "A"},
|
|
{"roadmap_major_step_index": 1, "exercise_id": 2, "title": "B"},
|
|
]
|
|
specs = [_spec(0), _spec(1)]
|
|
problems = _problematic_slots_from_path_qa(qa, steps, specs)
|
|
assert 1 in problems
|
|
assert any("Stufe" in r or "passt" in r for r in problems[1])
|
|
|
|
|
|
def test_slot_suggestion_accepted_for_problem_slot():
|
|
diff = {"baseline_exercise_id": 10, "proposed_exercise_id": 99}
|
|
assert _slot_suggestion_accepted(
|
|
baseline_qa={"optimization_hints": [{"action": "rematch_slot", "roadmap_major_step_index": 1}]},
|
|
projected_qa={"optimization_hints": []},
|
|
baseline_score=0.7,
|
|
projected_score=0.7,
|
|
diff=diff,
|
|
off_topic=False,
|
|
major_idx=1,
|
|
slot_problem=True,
|
|
)
|
|
|
|
|
|
def test_parse_slot_refs_schritt_is_one_based():
|
|
assert _parse_slot_refs_from_text("Schritt 8 (Ukemi Vorwärts) entfernen") == {7}
|
|
assert _parse_slot_refs_from_text("slot 3 und Stufe 5") == {2, 4}
|
|
|
|
|
|
def test_problematic_slots_from_refine_stage_spec_hint():
|
|
qa = {
|
|
"optimization_hints": [
|
|
{
|
|
"action": "refine_stage_spec",
|
|
"step_index": 7,
|
|
"issue": "stage_mismatch",
|
|
"reason": "Stufen-Fit zu schwach (0.00) für „Integration von Täuschung“",
|
|
}
|
|
],
|
|
"off_topic_steps": [],
|
|
}
|
|
steps = [
|
|
{"roadmap_major_step_index": i, "exercise_id": i + 1, "title": f"Übung {i + 1}"}
|
|
for i in range(8)
|
|
]
|
|
steps[7]["title"] = "Ukemi Vorwärts"
|
|
specs = [_spec(i) for i in range(8)]
|
|
problems = _problematic_slots_from_path_qa(qa, steps, specs)
|
|
assert 7 in problems
|
|
|
|
|
|
def test_problematic_slots_from_llm_schritt_text():
|
|
qa = {
|
|
"optimization_hints": [],
|
|
"off_topic_steps": [],
|
|
"issues": [
|
|
"Schritt 8 (Ukemi Vorwärts) hat keinen Bezug zur Kumite-Beinarbeit",
|
|
],
|
|
}
|
|
steps = [
|
|
{"roadmap_major_step_index": 7, "exercise_id": 99, "title": "Ukemi Vorwärts"},
|
|
]
|
|
specs = [_spec(7)]
|
|
problems = _problematic_slots_from_path_qa(qa, steps, specs)
|
|
assert 7 in problems
|
|
|
|
|
|
def test_slot_auto_select_requires_higher_score():
|
|
assert _slot_auto_select_library(
|
|
baseline_slot_score=0.5,
|
|
proposed_slot_score=0.51,
|
|
baseline_exercise_id=1,
|
|
proposed_exercise_id=2,
|
|
)
|
|
assert not _slot_auto_select_library(
|
|
baseline_slot_score=0.5,
|
|
proposed_slot_score=0.5,
|
|
baseline_exercise_id=1,
|
|
proposed_exercise_id=2,
|
|
)
|