All checks were successful
Deploy Development / deploy (push) Successful in 46s
Test Suite / pytest-backend (push) Successful in 40s
Test Suite / lint-backend (push) Successful in 0s
Test Suite / build-frontend (push) Successful in 14s
Test Suite / k6 /health Baseline (push) Successful in 33s
Test Suite / playwright-tests (push) Successful in 1m16s
- Introduced a new function `hybrid_ranking_ambiguous` to determine when to rerank candidates based on score proximity, improving the decision-making process for exercise suggestions. - Updated `should_run_llm_rank_pipeline` to incorporate the new ranking logic and handle scenarios with ambiguous rankings more effectively. - Adjusted the frontend to always include LLM ranking in requests, ensuring consistent behavior across different query lengths. - Incremented version to 0.8.182 and updated changelog to reflect these enhancements in planning AI capabilities.
201 lines
7.3 KiB
Python
201 lines
7.3 KiB
Python
"""Tests Planungs-Übungssuche: Intent, Szenario-Pipeline, LLM-Parser."""
|
|
from planning_exercise_suggest import resolve_planning_exercise_intent
|
|
from planning_exercise_intent import parse_planning_query_intent_response
|
|
from planning_exercise_llm_rank import parse_planning_exercise_rank_response
|
|
from planning_exercise_target_pipeline import (
|
|
SCENARIO_ADDITIVE,
|
|
SCENARIO_PRESET_NEXT,
|
|
classify_planning_scenario,
|
|
compose_retrieval_phase,
|
|
is_simple_preset_query,
|
|
should_run_llm_intent_pipeline,
|
|
)
|
|
|
|
|
|
def test_resolve_planning_exercise_intent_defaults():
|
|
assert resolve_planning_exercise_intent("", None) == "suggest_next"
|
|
assert resolve_planning_exercise_intent(" ", "suggest_next") == "suggest_next"
|
|
|
|
|
|
def test_resolve_planning_exercise_intent_keywords():
|
|
assert resolve_planning_exercise_intent("Vertiefung Partner", None) == "deepen_exercise"
|
|
assert resolve_planning_exercise_intent("nächste übung", None) == "suggest_next"
|
|
assert resolve_planning_exercise_intent("progression graph", None) == "progression_next"
|
|
|
|
|
|
def test_classify_planning_scenario_preset():
|
|
assert is_simple_preset_query("Schlage mir die nächste Übung vor")
|
|
assert is_simple_preset_query("nächste Übung planen")
|
|
assert classify_planning_scenario("", "suggest_next") == SCENARIO_PRESET_NEXT
|
|
assert classify_planning_scenario("nächste übung", "suggest_next") == SCENARIO_PRESET_NEXT
|
|
assert classify_planning_scenario("nächste Übung planen", "suggest_next") == SCENARIO_PRESET_NEXT
|
|
|
|
|
|
def test_classify_planning_scenario_additive():
|
|
q = "Baut auf der Planung auf und trainiert zusätzlich Schnellkraft"
|
|
assert classify_planning_scenario(q, "continue_plan_goal") == SCENARIO_ADDITIVE
|
|
assert should_run_llm_intent_pipeline(q, SCENARIO_ADDITIVE, include_llm_intent=True)
|
|
|
|
|
|
def test_should_skip_llm_for_preset():
|
|
assert not should_run_llm_intent_pipeline("", SCENARIO_PRESET_NEXT, include_llm_intent=True)
|
|
assert not should_run_llm_intent_pipeline(
|
|
"nächste übung",
|
|
SCENARIO_PRESET_NEXT,
|
|
include_llm_intent=True,
|
|
)
|
|
|
|
|
|
def test_should_skip_llm_intent_short_free_search():
|
|
from planning_exercise_target_pipeline import SCENARIO_FREE_SEARCH, should_run_llm_intent_pipeline
|
|
|
|
assert not should_run_llm_intent_pipeline(
|
|
"Partnerübung",
|
|
SCENARIO_FREE_SEARCH,
|
|
include_llm_intent=True,
|
|
)
|
|
|
|
|
|
def test_should_run_llm_rank_when_intent_applied_and_ambiguous():
|
|
from planning_exercise_target_pipeline import SCENARIO_ADDITIVE, should_run_llm_rank_pipeline
|
|
|
|
hits = [{"score": 0.5}, {"score": 0.48}, {"score": 0.47}, {"score": 0.46}]
|
|
assert should_run_llm_rank_pipeline(
|
|
"Baut auf dem Plan auf und trainiert zusätzlich Schnellkraft mit Partner",
|
|
SCENARIO_ADDITIVE,
|
|
include_llm_rank=True,
|
|
query_intent_applied=True,
|
|
hits=hits,
|
|
)
|
|
|
|
|
|
def test_should_skip_llm_rank_when_ranking_confident():
|
|
from planning_exercise_target_pipeline import SCENARIO_PRESET_NEXT, should_run_llm_rank_pipeline
|
|
|
|
hits = [{"score": 0.9}, {"score": 0.5}, {"score": 0.4}, {"score": 0.3}]
|
|
assert not should_run_llm_rank_pipeline(
|
|
"",
|
|
SCENARIO_PRESET_NEXT,
|
|
include_llm_rank=True,
|
|
query_intent_applied=False,
|
|
llm_expectation_applied=True,
|
|
has_planning_reference=True,
|
|
hits=hits,
|
|
)
|
|
|
|
|
|
def test_should_run_llm_rank_for_preset_when_ambiguous():
|
|
from planning_exercise_target_pipeline import SCENARIO_PRESET_NEXT, should_run_llm_rank_pipeline
|
|
|
|
hits = [{"score": 0.42}, {"score": 0.41}, {"score": 0.4}, {"score": 0.39}]
|
|
assert should_run_llm_rank_pipeline(
|
|
"",
|
|
SCENARIO_PRESET_NEXT,
|
|
include_llm_rank=True,
|
|
query_intent_applied=False,
|
|
llm_expectation_applied=True,
|
|
has_planning_reference=True,
|
|
hits=hits,
|
|
)
|
|
|
|
|
|
def test_compose_retrieval_phase():
|
|
assert compose_retrieval_phase(query_intent=False, llm_rank=False) == "profile_v1"
|
|
assert compose_retrieval_phase(query_intent=True, llm_rank=True) == "profile_v1+query_intent+llm_rank"
|
|
|
|
assert (
|
|
compose_retrieval_phase(full_library=True, query_intent=True, llm_rank=False)
|
|
== "profile_v1+full_library+query_intent"
|
|
)
|
|
|
|
|
|
def test_should_run_llm_expectation_for_preset_with_planning_ref():
|
|
from planning_exercise_target_pipeline import should_run_llm_expectation_pipeline
|
|
|
|
assert should_run_llm_expectation_pipeline(
|
|
SCENARIO_PRESET_NEXT,
|
|
include_llm_intent=True,
|
|
has_planning_reference=True,
|
|
)
|
|
assert not should_run_llm_expectation_pipeline(
|
|
SCENARIO_PRESET_NEXT,
|
|
include_llm_intent=False,
|
|
has_planning_reference=True,
|
|
)
|
|
assert not should_run_llm_expectation_pipeline(
|
|
SCENARIO_ADDITIVE,
|
|
include_llm_intent=True,
|
|
has_planning_reference=True,
|
|
)
|
|
|
|
|
|
def test_should_skip_llm_rank_when_expectation_applied_but_confident():
|
|
from planning_exercise_target_pipeline import SCENARIO_PRESET_NEXT, should_run_llm_rank_pipeline
|
|
|
|
hits = [{"score": 0.85}, {"score": 0.4}, {"score": 0.35}, {"score": 0.3}]
|
|
assert not should_run_llm_rank_pipeline(
|
|
"",
|
|
SCENARIO_PRESET_NEXT,
|
|
include_llm_rank=True,
|
|
query_intent_applied=False,
|
|
llm_expectation_applied=True,
|
|
hits=hits,
|
|
)
|
|
|
|
|
|
def test_compose_retrieval_phase_llm_expectation():
|
|
assert (
|
|
compose_retrieval_phase(llm_expectation=True)
|
|
== "profile_v1+llm_expectation"
|
|
)
|
|
assert (
|
|
compose_retrieval_phase(full_library=True, llm_expectation=True)
|
|
== "profile_v1+full_library+llm_expectation"
|
|
)
|
|
|
|
|
|
def test_query_only_expectation_without_planning_reference():
|
|
from planning_exercise_profiles import PlanningTargetProfile
|
|
from planning_exercise_target_pipeline import build_planning_target_with_query_pipeline
|
|
|
|
class _Cur:
|
|
pass
|
|
|
|
target, intent, scenario, summary = build_planning_target_with_query_pipeline(
|
|
_Cur(),
|
|
unit={"id": None, "framework_slot_id": None, "origin_framework_slot_id": None},
|
|
planned_exercise_ids=[],
|
|
section_planned_exercise_ids=[],
|
|
anchor_exercise_id=None,
|
|
query="Partnerübung Reaktion",
|
|
heuristic_intent="free_search",
|
|
include_llm_intent=False,
|
|
context_summary={"expectation_mode": "query_only"},
|
|
has_planning_reference=False,
|
|
)
|
|
assert intent == "free_search"
|
|
assert summary.get("expectation_mode") == "query_only"
|
|
assert target.sources == ["query_only"] or "query_only" in target.sources
|
|
|
|
|
|
def test_parse_planning_query_intent_response():
|
|
parsed = parse_planning_query_intent_response(
|
|
'{"intent":"continue_plan_goal","scenario":"additive_constraint",'
|
|
'"skill_hints":[{"name":"Schnellkraft","weight":1}],"emphasis":"additive",'
|
|
'"rationale":"Zusatz Schnellkraft"}'
|
|
)
|
|
assert parsed.intent == "continue_plan_goal"
|
|
assert parsed.scenario == "additive_constraint"
|
|
assert parsed.skill_hints[0].name == "Schnellkraft"
|
|
|
|
|
|
def test_parse_planning_exercise_rank_response_filters_ids():
|
|
allowed = {10, 20, 30}
|
|
ranked, reasons = parse_planning_exercise_rank_response(
|
|
'{"ranked_ids":[20,999,20,10],"reasons":{"20":"Passt gut","999":"ignore"}}',
|
|
allowed,
|
|
)
|
|
assert ranked == [20, 10]
|
|
assert reasons[20] == "Passt gut"
|
|
assert 999 not in reasons
|