Enhance Planning Exercise Suggestion and Ranking Logic
All checks were successful
Deploy Development / deploy (push) Successful in 46s
Test Suite / pytest-backend (push) Successful in 40s
Test Suite / lint-backend (push) Successful in 0s
Test Suite / build-frontend (push) Successful in 14s
Test Suite / k6 /health Baseline (push) Successful in 33s
Test Suite / playwright-tests (push) Successful in 1m16s
All checks were successful
Deploy Development / deploy (push) Successful in 46s
Test Suite / pytest-backend (push) Successful in 40s
Test Suite / lint-backend (push) Successful in 0s
Test Suite / build-frontend (push) Successful in 14s
Test Suite / k6 /health Baseline (push) Successful in 33s
Test Suite / playwright-tests (push) Successful in 1m16s
- Introduced a new function `hybrid_ranking_ambiguous` to determine when to rerank candidates based on score proximity, improving the decision-making process for exercise suggestions. - Updated `should_run_llm_rank_pipeline` to incorporate the new ranking logic and handle scenarios with ambiguous rankings more effectively. - Adjusted the frontend to always include LLM ranking in requests, ensuring consistent behavior across different query lengths. - Incremented version to 0.8.182 and updated changelog to reflect these enhancements in planning AI capabilities.
This commit is contained in:
parent
a0a891e550
commit
50aff849d8
|
|
@ -662,6 +662,7 @@ def suggest_planning_exercises(
|
|||
include_llm_rank=body.include_llm_rank,
|
||||
query_intent_applied=query_intent_applied,
|
||||
llm_expectation_applied=llm_expectation_applied,
|
||||
has_planning_reference=has_plan_ref,
|
||||
hits=hits,
|
||||
)
|
||||
if run_llm_rank:
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ Ablauf:
|
|||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from typing import Any, Dict, List, Mapping, Optional, Tuple
|
||||
from typing import Any, Dict, List, Mapping, Optional, Sequence, Tuple
|
||||
|
||||
from planning_exercise_expectation import try_build_planning_expectation_from_context
|
||||
from planning_exercise_intent import (
|
||||
|
|
@ -135,6 +135,31 @@ def deterministic_rank_confident(hits: Sequence[Mapping[str, Any]], *, gap_thres
|
|||
return (top - fourth) >= gap_threshold
|
||||
|
||||
|
||||
def hybrid_ranking_ambiguous(
|
||||
hits: Sequence[Mapping[str, Any]],
|
||||
*,
|
||||
top_four_gap: float = 0.08,
|
||||
top_ten_gap: float = 0.055,
|
||||
) -> bool:
|
||||
"""True wenn Top-Kandidaten scores zu nah beieinander liegen — Rerank lohnt sich."""
|
||||
if len(hits) < 3:
|
||||
return False
|
||||
top = float(hits[0].get("score") or 0.0)
|
||||
if len(hits) >= 4:
|
||||
fourth = float(hits[3].get("score") or 0.0)
|
||||
if (top - fourth) < top_four_gap:
|
||||
return True
|
||||
if len(hits) >= 10:
|
||||
tenth = float(hits[9].get("score") or 0.0)
|
||||
if (top - tenth) < top_ten_gap:
|
||||
return True
|
||||
elif len(hits) >= 2:
|
||||
tail = float(hits[min(len(hits) - 1, 9)].get("score") or 0.0)
|
||||
if (top - tail) < top_four_gap:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def should_run_llm_rank_pipeline(
|
||||
query: Optional[str],
|
||||
scenario: str,
|
||||
|
|
@ -142,26 +167,38 @@ def should_run_llm_rank_pipeline(
|
|||
include_llm_rank: bool,
|
||||
query_intent_applied: bool,
|
||||
llm_expectation_applied: bool = False,
|
||||
has_planning_reference: bool = True,
|
||||
hits: Sequence[Mapping[str, Any]],
|
||||
) -> bool:
|
||||
"""
|
||||
Maximal ein LLM-Call pro Request: wenn Intent- oder Erwartungs-LLM lief, kein Rerank.
|
||||
Rerank nur bei längerer, komplexer Anfrage und unklarem Hybrid-Ranking.
|
||||
Phase B2: Rerank bei unklarem Hybrid-Ranking — auch nach Erwartungs-/Intent-LLM.
|
||||
|
||||
Budget: max. 2 LLM-Calls pro Suche (Profil-LLM + optional Rerank).
|
||||
"""
|
||||
if not include_llm_rank:
|
||||
return False
|
||||
if query_intent_applied or llm_expectation_applied:
|
||||
if len(hits) < 3:
|
||||
return False
|
||||
if scenario == SCENARIO_PRESET_NEXT:
|
||||
if not hybrid_ranking_ambiguous(hits):
|
||||
return False
|
||||
|
||||
q = _normalize_query(query)
|
||||
if not q:
|
||||
return False
|
||||
profile_llm = query_intent_applied or llm_expectation_applied
|
||||
|
||||
if scenario == SCENARIO_PRESET_NEXT:
|
||||
return has_planning_reference
|
||||
|
||||
if scenario == SCENARIO_FREE_SEARCH:
|
||||
if len(q) < 10 and not profile_llm:
|
||||
return False
|
||||
return True
|
||||
|
||||
if scenario == SCENARIO_ADDITIVE:
|
||||
return len(q) >= 12 and not deterministic_rank_confident(hits)
|
||||
if len(q) < 22:
|
||||
return False
|
||||
return not deterministic_rank_confident(hits)
|
||||
return len(q) >= 8 or profile_llm
|
||||
|
||||
if profile_llm:
|
||||
return True
|
||||
return len(q) >= 14
|
||||
|
||||
|
||||
def _recalculate_skill_gap(target: PlanningTargetProfile) -> PlanningTargetProfile:
|
||||
|
|
@ -420,4 +457,5 @@ __all__ = [
|
|||
"should_run_llm_intent_pipeline",
|
||||
"should_run_llm_rank_pipeline",
|
||||
"deterministic_rank_confident",
|
||||
"hybrid_ranking_ambiguous",
|
||||
]
|
||||
|
|
|
|||
|
|
@ -56,11 +56,11 @@ def test_should_skip_llm_intent_short_free_search():
|
|||
)
|
||||
|
||||
|
||||
def test_should_skip_llm_rank_when_intent_already_applied():
|
||||
def test_should_run_llm_rank_when_intent_applied_and_ambiguous():
|
||||
from planning_exercise_target_pipeline import SCENARIO_ADDITIVE, should_run_llm_rank_pipeline
|
||||
|
||||
hits = [{"score": 0.5}, {"score": 0.48}, {"score": 0.47}, {"score": 0.46}]
|
||||
assert not should_run_llm_rank_pipeline(
|
||||
assert should_run_llm_rank_pipeline(
|
||||
"Baut auf dem Plan auf und trainiert zusätzlich Schnellkraft mit Partner",
|
||||
SCENARIO_ADDITIVE,
|
||||
include_llm_rank=True,
|
||||
|
|
@ -69,6 +69,36 @@ def test_should_skip_llm_rank_when_intent_already_applied():
|
|||
)
|
||||
|
||||
|
||||
def test_should_skip_llm_rank_when_ranking_confident():
|
||||
from planning_exercise_target_pipeline import SCENARIO_PRESET_NEXT, should_run_llm_rank_pipeline
|
||||
|
||||
hits = [{"score": 0.9}, {"score": 0.5}, {"score": 0.4}, {"score": 0.3}]
|
||||
assert not should_run_llm_rank_pipeline(
|
||||
"",
|
||||
SCENARIO_PRESET_NEXT,
|
||||
include_llm_rank=True,
|
||||
query_intent_applied=False,
|
||||
llm_expectation_applied=True,
|
||||
has_planning_reference=True,
|
||||
hits=hits,
|
||||
)
|
||||
|
||||
|
||||
def test_should_run_llm_rank_for_preset_when_ambiguous():
|
||||
from planning_exercise_target_pipeline import SCENARIO_PRESET_NEXT, should_run_llm_rank_pipeline
|
||||
|
||||
hits = [{"score": 0.42}, {"score": 0.41}, {"score": 0.4}, {"score": 0.39}]
|
||||
assert should_run_llm_rank_pipeline(
|
||||
"",
|
||||
SCENARIO_PRESET_NEXT,
|
||||
include_llm_rank=True,
|
||||
query_intent_applied=False,
|
||||
llm_expectation_applied=True,
|
||||
has_planning_reference=True,
|
||||
hits=hits,
|
||||
)
|
||||
|
||||
|
||||
def test_compose_retrieval_phase():
|
||||
assert compose_retrieval_phase(query_intent=False, llm_rank=False) == "profile_v1"
|
||||
assert compose_retrieval_phase(query_intent=True, llm_rank=True) == "profile_v1+query_intent+llm_rank"
|
||||
|
|
@ -99,10 +129,10 @@ def test_should_run_llm_expectation_for_preset_with_planning_ref():
|
|||
)
|
||||
|
||||
|
||||
def test_should_skip_llm_rank_when_expectation_applied():
|
||||
def test_should_skip_llm_rank_when_expectation_applied_but_confident():
|
||||
from planning_exercise_target_pipeline import SCENARIO_PRESET_NEXT, should_run_llm_rank_pipeline
|
||||
|
||||
hits = [{"score": 0.5}, {"score": 0.48}, {"score": 0.47}, {"score": 0.46}]
|
||||
hits = [{"score": 0.85}, {"score": 0.4}, {"score": 0.35}, {"score": 0.3}]
|
||||
assert not should_run_llm_rank_pipeline(
|
||||
"",
|
||||
SCENARIO_PRESET_NEXT,
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
# Shinkan Jinkendo Version Information
|
||||
|
||||
APP_VERSION = "0.8.181"
|
||||
APP_VERSION = "0.8.182"
|
||||
BUILD_DATE = "2026-05-23"
|
||||
DB_SCHEMA_VERSION = "20260531074"
|
||||
|
||||
|
|
@ -29,7 +29,7 @@ MODULE_VERSIONS = {
|
|||
"skill_profiles": "1.0.0", # Phase 3: gewichtetes Fähigkeiten-Profil + skill-discovery/suggestions
|
||||
"methods": "0.1.0",
|
||||
"exercises": "2.37.0", # Planungs-KI P1: Szenario-Pipeline + Query-Intent-Overlay
|
||||
"planning_exercise_suggest": "0.9.0", # Phase B: Text-Signale guidance/Rahmen-Ziele; requires_partner-Filter
|
||||
"planning_exercise_suggest": "0.10.0", # Phase B2: Rerank bei engem Top-Feld, auch nach Profil-LLM
|
||||
"training_units": "0.4.0", # POST .../publish-to-framework: Ablauf aus geplanter Einheit → Rahmen-Slot-Blueprint
|
||||
"training_programs": "0.1.0",
|
||||
"planning": "0.15.0", # Vorlagen: Strukturvorschau, Bearbeiten inkl. Split-Sessions + Beschreibung
|
||||
|
|
@ -44,6 +44,14 @@ MODULE_VERSIONS = {
|
|||
}
|
||||
|
||||
CHANGELOG = [
|
||||
{
|
||||
"version": "0.8.182",
|
||||
"date": "2026-05-23",
|
||||
"changes": [
|
||||
"Planungs-KI Phase B2: LLM-Rerank bei engem Top-Feld — auch nach Erwartungs-/Intent-LLM (max. 2 Calls).",
|
||||
"Preset „Nächste aus Kontext“: Rerank wenn Ranking unklar; Frontend sendet include_llm_rank immer.",
|
||||
],
|
||||
},
|
||||
{
|
||||
"version": "0.8.181",
|
||||
"date": "2026-05-23",
|
||||
|
|
|
|||
|
|
@ -31,7 +31,6 @@ const PAGE_SIZE = 100
|
|||
const PLANNING_SUGGEST_LIMIT = 50
|
||||
/** Client-Hinweis — Backend entscheidet final über LLM-Gates (max. 1 Call). */
|
||||
const PLANNING_LLM_INTENT_MIN_CHARS = 10
|
||||
const PLANNING_LLM_RANK_MIN_CHARS = 24
|
||||
const LEVEL_FILTER_OPTS = SKILL_LEVEL_OPTIONS.filter((o) => o.level != null)
|
||||
|
||||
const INITIAL_FILTERS = { ...INITIAL_EXERCISE_LIST_FILTERS }
|
||||
|
|
@ -460,7 +459,7 @@ export default function ExercisePickerModal({
|
|||
: undefined,
|
||||
include_llm_intent:
|
||||
query.length >= PLANNING_LLM_INTENT_MIN_CHARS || !(query || '').trim(),
|
||||
include_llm_rank: query.length >= PLANNING_LLM_RANK_MIN_CHARS,
|
||||
include_llm_rank: true,
|
||||
query,
|
||||
intent_hint:
|
||||
activePlanningContext.intentHint || (useFreePlanningSearch && query ? 'free_search' : null),
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user