shinkan-jinkendo/backend/tests/test_planning_exercise_suggest.py
Lars 50aff849d8
All checks were successful
Deploy Development / deploy (push) Successful in 46s
Test Suite / pytest-backend (push) Successful in 40s
Test Suite / lint-backend (push) Successful in 0s
Test Suite / build-frontend (push) Successful in 14s
Test Suite / k6 /health Baseline (push) Successful in 33s
Test Suite / playwright-tests (push) Successful in 1m16s
Enhance Planning Exercise Suggestion and Ranking Logic
- Introduced a new function `hybrid_ranking_ambiguous` to determine when to rerank candidates based on score proximity, improving the decision-making process for exercise suggestions.
- Updated `should_run_llm_rank_pipeline` to incorporate the new ranking logic and handle scenarios with ambiguous rankings more effectively.
- Adjusted the frontend to always include LLM ranking in requests, ensuring consistent behavior across different query lengths.
- Incremented version to 0.8.182 and updated changelog to reflect these enhancements in planning AI capabilities.
2026-05-23 10:28:03 +02:00

201 lines
7.3 KiB
Python

"""Tests Planungs-Übungssuche: Intent, Szenario-Pipeline, LLM-Parser."""
from planning_exercise_suggest import resolve_planning_exercise_intent
from planning_exercise_intent import parse_planning_query_intent_response
from planning_exercise_llm_rank import parse_planning_exercise_rank_response
from planning_exercise_target_pipeline import (
SCENARIO_ADDITIVE,
SCENARIO_PRESET_NEXT,
classify_planning_scenario,
compose_retrieval_phase,
is_simple_preset_query,
should_run_llm_intent_pipeline,
)
def test_resolve_planning_exercise_intent_defaults():
assert resolve_planning_exercise_intent("", None) == "suggest_next"
assert resolve_planning_exercise_intent(" ", "suggest_next") == "suggest_next"
def test_resolve_planning_exercise_intent_keywords():
assert resolve_planning_exercise_intent("Vertiefung Partner", None) == "deepen_exercise"
assert resolve_planning_exercise_intent("nächste übung", None) == "suggest_next"
assert resolve_planning_exercise_intent("progression graph", None) == "progression_next"
def test_classify_planning_scenario_preset():
assert is_simple_preset_query("Schlage mir die nächste Übung vor")
assert is_simple_preset_query("nächste Übung planen")
assert classify_planning_scenario("", "suggest_next") == SCENARIO_PRESET_NEXT
assert classify_planning_scenario("nächste übung", "suggest_next") == SCENARIO_PRESET_NEXT
assert classify_planning_scenario("nächste Übung planen", "suggest_next") == SCENARIO_PRESET_NEXT
def test_classify_planning_scenario_additive():
q = "Baut auf der Planung auf und trainiert zusätzlich Schnellkraft"
assert classify_planning_scenario(q, "continue_plan_goal") == SCENARIO_ADDITIVE
assert should_run_llm_intent_pipeline(q, SCENARIO_ADDITIVE, include_llm_intent=True)
def test_should_skip_llm_for_preset():
assert not should_run_llm_intent_pipeline("", SCENARIO_PRESET_NEXT, include_llm_intent=True)
assert not should_run_llm_intent_pipeline(
"nächste übung",
SCENARIO_PRESET_NEXT,
include_llm_intent=True,
)
def test_should_skip_llm_intent_short_free_search():
from planning_exercise_target_pipeline import SCENARIO_FREE_SEARCH, should_run_llm_intent_pipeline
assert not should_run_llm_intent_pipeline(
"Partnerübung",
SCENARIO_FREE_SEARCH,
include_llm_intent=True,
)
def test_should_run_llm_rank_when_intent_applied_and_ambiguous():
from planning_exercise_target_pipeline import SCENARIO_ADDITIVE, should_run_llm_rank_pipeline
hits = [{"score": 0.5}, {"score": 0.48}, {"score": 0.47}, {"score": 0.46}]
assert should_run_llm_rank_pipeline(
"Baut auf dem Plan auf und trainiert zusätzlich Schnellkraft mit Partner",
SCENARIO_ADDITIVE,
include_llm_rank=True,
query_intent_applied=True,
hits=hits,
)
def test_should_skip_llm_rank_when_ranking_confident():
from planning_exercise_target_pipeline import SCENARIO_PRESET_NEXT, should_run_llm_rank_pipeline
hits = [{"score": 0.9}, {"score": 0.5}, {"score": 0.4}, {"score": 0.3}]
assert not should_run_llm_rank_pipeline(
"",
SCENARIO_PRESET_NEXT,
include_llm_rank=True,
query_intent_applied=False,
llm_expectation_applied=True,
has_planning_reference=True,
hits=hits,
)
def test_should_run_llm_rank_for_preset_when_ambiguous():
from planning_exercise_target_pipeline import SCENARIO_PRESET_NEXT, should_run_llm_rank_pipeline
hits = [{"score": 0.42}, {"score": 0.41}, {"score": 0.4}, {"score": 0.39}]
assert should_run_llm_rank_pipeline(
"",
SCENARIO_PRESET_NEXT,
include_llm_rank=True,
query_intent_applied=False,
llm_expectation_applied=True,
has_planning_reference=True,
hits=hits,
)
def test_compose_retrieval_phase():
assert compose_retrieval_phase(query_intent=False, llm_rank=False) == "profile_v1"
assert compose_retrieval_phase(query_intent=True, llm_rank=True) == "profile_v1+query_intent+llm_rank"
assert (
compose_retrieval_phase(full_library=True, query_intent=True, llm_rank=False)
== "profile_v1+full_library+query_intent"
)
def test_should_run_llm_expectation_for_preset_with_planning_ref():
from planning_exercise_target_pipeline import should_run_llm_expectation_pipeline
assert should_run_llm_expectation_pipeline(
SCENARIO_PRESET_NEXT,
include_llm_intent=True,
has_planning_reference=True,
)
assert not should_run_llm_expectation_pipeline(
SCENARIO_PRESET_NEXT,
include_llm_intent=False,
has_planning_reference=True,
)
assert not should_run_llm_expectation_pipeline(
SCENARIO_ADDITIVE,
include_llm_intent=True,
has_planning_reference=True,
)
def test_should_skip_llm_rank_when_expectation_applied_but_confident():
from planning_exercise_target_pipeline import SCENARIO_PRESET_NEXT, should_run_llm_rank_pipeline
hits = [{"score": 0.85}, {"score": 0.4}, {"score": 0.35}, {"score": 0.3}]
assert not should_run_llm_rank_pipeline(
"",
SCENARIO_PRESET_NEXT,
include_llm_rank=True,
query_intent_applied=False,
llm_expectation_applied=True,
hits=hits,
)
def test_compose_retrieval_phase_llm_expectation():
assert (
compose_retrieval_phase(llm_expectation=True)
== "profile_v1+llm_expectation"
)
assert (
compose_retrieval_phase(full_library=True, llm_expectation=True)
== "profile_v1+full_library+llm_expectation"
)
def test_query_only_expectation_without_planning_reference():
from planning_exercise_profiles import PlanningTargetProfile
from planning_exercise_target_pipeline import build_planning_target_with_query_pipeline
class _Cur:
pass
target, intent, scenario, summary = build_planning_target_with_query_pipeline(
_Cur(),
unit={"id": None, "framework_slot_id": None, "origin_framework_slot_id": None},
planned_exercise_ids=[],
section_planned_exercise_ids=[],
anchor_exercise_id=None,
query="Partnerübung Reaktion",
heuristic_intent="free_search",
include_llm_intent=False,
context_summary={"expectation_mode": "query_only"},
has_planning_reference=False,
)
assert intent == "free_search"
assert summary.get("expectation_mode") == "query_only"
assert target.sources == ["query_only"] or "query_only" in target.sources
def test_parse_planning_query_intent_response():
parsed = parse_planning_query_intent_response(
'{"intent":"continue_plan_goal","scenario":"additive_constraint",'
'"skill_hints":[{"name":"Schnellkraft","weight":1}],"emphasis":"additive",'
'"rationale":"Zusatz Schnellkraft"}'
)
assert parsed.intent == "continue_plan_goal"
assert parsed.scenario == "additive_constraint"
assert parsed.skill_hints[0].name == "Schnellkraft"
def test_parse_planning_exercise_rank_response_filters_ids():
allowed = {10, 20, 30}
ranked, reasons = parse_planning_exercise_rank_response(
'{"ranked_ids":[20,999,20,10],"reasons":{"20":"Passt gut","999":"ignore"}}',
allowed,
)
assert ranked == [20, 10]
assert reasons[20] == "Passt gut"
assert 999 not in reasons