From 50aff849d89bc007fb209e3d0bcfdcffd138986d Mon Sep 17 00:00:00 2001 From: Lars Date: Sat, 23 May 2026 10:28:03 +0200 Subject: [PATCH] Enhance Planning Exercise Suggestion and Ranking Logic - Introduced a new function `hybrid_ranking_ambiguous` to determine when to rerank candidates based on score proximity, improving the decision-making process for exercise suggestions. - Updated `should_run_llm_rank_pipeline` to incorporate the new ranking logic and handle scenarios with ambiguous rankings more effectively. - Adjusted the frontend to always include LLM ranking in requests, ensuring consistent behavior across different query lengths. - Incremented version to 0.8.182 and updated changelog to reflect these enhancements in planning AI capabilities. --- backend/planning_exercise_suggest.py | 1 + backend/planning_exercise_target_pipeline.py | 60 +++++++++++++++---- .../tests/test_planning_exercise_suggest.py | 38 ++++++++++-- backend/version.py | 12 +++- .../src/components/ExercisePickerModal.jsx | 3 +- 5 files changed, 95 insertions(+), 19 deletions(-) diff --git a/backend/planning_exercise_suggest.py b/backend/planning_exercise_suggest.py index dc5f148..4085b22 100644 --- a/backend/planning_exercise_suggest.py +++ b/backend/planning_exercise_suggest.py @@ -662,6 +662,7 @@ def suggest_planning_exercises( include_llm_rank=body.include_llm_rank, query_intent_applied=query_intent_applied, llm_expectation_applied=llm_expectation_applied, + has_planning_reference=has_plan_ref, hits=hits, ) if run_llm_rank: diff --git a/backend/planning_exercise_target_pipeline.py b/backend/planning_exercise_target_pipeline.py index ac4b403..3846c34 100644 --- a/backend/planning_exercise_target_pipeline.py +++ b/backend/planning_exercise_target_pipeline.py @@ -10,7 +10,7 @@ Ablauf: from __future__ import annotations import re -from typing import Any, Dict, List, Mapping, Optional, Tuple +from typing import Any, Dict, List, Mapping, Optional, Sequence, Tuple from planning_exercise_expectation import try_build_planning_expectation_from_context from planning_exercise_intent import ( @@ -135,6 +135,31 @@ def deterministic_rank_confident(hits: Sequence[Mapping[str, Any]], *, gap_thres return (top - fourth) >= gap_threshold +def hybrid_ranking_ambiguous( + hits: Sequence[Mapping[str, Any]], + *, + top_four_gap: float = 0.08, + top_ten_gap: float = 0.055, +) -> bool: + """True wenn Top-Kandidaten scores zu nah beieinander liegen — Rerank lohnt sich.""" + if len(hits) < 3: + return False + top = float(hits[0].get("score") or 0.0) + if len(hits) >= 4: + fourth = float(hits[3].get("score") or 0.0) + if (top - fourth) < top_four_gap: + return True + if len(hits) >= 10: + tenth = float(hits[9].get("score") or 0.0) + if (top - tenth) < top_ten_gap: + return True + elif len(hits) >= 2: + tail = float(hits[min(len(hits) - 1, 9)].get("score") or 0.0) + if (top - tail) < top_four_gap: + return True + return False + + def should_run_llm_rank_pipeline( query: Optional[str], scenario: str, @@ -142,26 +167,38 @@ def should_run_llm_rank_pipeline( include_llm_rank: bool, query_intent_applied: bool, llm_expectation_applied: bool = False, + has_planning_reference: bool = True, hits: Sequence[Mapping[str, Any]], ) -> bool: """ - Maximal ein LLM-Call pro Request: wenn Intent- oder Erwartungs-LLM lief, kein Rerank. - Rerank nur bei längerer, komplexer Anfrage und unklarem Hybrid-Ranking. + Phase B2: Rerank bei unklarem Hybrid-Ranking — auch nach Erwartungs-/Intent-LLM. + + Budget: max. 2 LLM-Calls pro Suche (Profil-LLM + optional Rerank). """ if not include_llm_rank: return False - if query_intent_applied or llm_expectation_applied: + if len(hits) < 3: return False - if scenario == SCENARIO_PRESET_NEXT: + if not hybrid_ranking_ambiguous(hits): return False + q = _normalize_query(query) - if not q: - return False + profile_llm = query_intent_applied or llm_expectation_applied + + if scenario == SCENARIO_PRESET_NEXT: + return has_planning_reference + + if scenario == SCENARIO_FREE_SEARCH: + if len(q) < 10 and not profile_llm: + return False + return True + if scenario == SCENARIO_ADDITIVE: - return len(q) >= 12 and not deterministic_rank_confident(hits) - if len(q) < 22: - return False - return not deterministic_rank_confident(hits) + return len(q) >= 8 or profile_llm + + if profile_llm: + return True + return len(q) >= 14 def _recalculate_skill_gap(target: PlanningTargetProfile) -> PlanningTargetProfile: @@ -420,4 +457,5 @@ __all__ = [ "should_run_llm_intent_pipeline", "should_run_llm_rank_pipeline", "deterministic_rank_confident", + "hybrid_ranking_ambiguous", ] diff --git a/backend/tests/test_planning_exercise_suggest.py b/backend/tests/test_planning_exercise_suggest.py index 4f5a951..34d026e 100644 --- a/backend/tests/test_planning_exercise_suggest.py +++ b/backend/tests/test_planning_exercise_suggest.py @@ -56,11 +56,11 @@ def test_should_skip_llm_intent_short_free_search(): ) -def test_should_skip_llm_rank_when_intent_already_applied(): +def test_should_run_llm_rank_when_intent_applied_and_ambiguous(): from planning_exercise_target_pipeline import SCENARIO_ADDITIVE, should_run_llm_rank_pipeline hits = [{"score": 0.5}, {"score": 0.48}, {"score": 0.47}, {"score": 0.46}] - assert not should_run_llm_rank_pipeline( + assert should_run_llm_rank_pipeline( "Baut auf dem Plan auf und trainiert zusätzlich Schnellkraft mit Partner", SCENARIO_ADDITIVE, include_llm_rank=True, @@ -69,6 +69,36 @@ def test_should_skip_llm_rank_when_intent_already_applied(): ) +def test_should_skip_llm_rank_when_ranking_confident(): + from planning_exercise_target_pipeline import SCENARIO_PRESET_NEXT, should_run_llm_rank_pipeline + + hits = [{"score": 0.9}, {"score": 0.5}, {"score": 0.4}, {"score": 0.3}] + assert not should_run_llm_rank_pipeline( + "", + SCENARIO_PRESET_NEXT, + include_llm_rank=True, + query_intent_applied=False, + llm_expectation_applied=True, + has_planning_reference=True, + hits=hits, + ) + + +def test_should_run_llm_rank_for_preset_when_ambiguous(): + from planning_exercise_target_pipeline import SCENARIO_PRESET_NEXT, should_run_llm_rank_pipeline + + hits = [{"score": 0.42}, {"score": 0.41}, {"score": 0.4}, {"score": 0.39}] + assert should_run_llm_rank_pipeline( + "", + SCENARIO_PRESET_NEXT, + include_llm_rank=True, + query_intent_applied=False, + llm_expectation_applied=True, + has_planning_reference=True, + hits=hits, + ) + + def test_compose_retrieval_phase(): assert compose_retrieval_phase(query_intent=False, llm_rank=False) == "profile_v1" assert compose_retrieval_phase(query_intent=True, llm_rank=True) == "profile_v1+query_intent+llm_rank" @@ -99,10 +129,10 @@ def test_should_run_llm_expectation_for_preset_with_planning_ref(): ) -def test_should_skip_llm_rank_when_expectation_applied(): +def test_should_skip_llm_rank_when_expectation_applied_but_confident(): from planning_exercise_target_pipeline import SCENARIO_PRESET_NEXT, should_run_llm_rank_pipeline - hits = [{"score": 0.5}, {"score": 0.48}, {"score": 0.47}, {"score": 0.46}] + hits = [{"score": 0.85}, {"score": 0.4}, {"score": 0.35}, {"score": 0.3}] assert not should_run_llm_rank_pipeline( "", SCENARIO_PRESET_NEXT, diff --git a/backend/version.py b/backend/version.py index 62b9181..ae4b50f 100644 --- a/backend/version.py +++ b/backend/version.py @@ -1,6 +1,6 @@ # Shinkan Jinkendo Version Information -APP_VERSION = "0.8.181" +APP_VERSION = "0.8.182" BUILD_DATE = "2026-05-23" DB_SCHEMA_VERSION = "20260531074" @@ -29,7 +29,7 @@ MODULE_VERSIONS = { "skill_profiles": "1.0.0", # Phase 3: gewichtetes Fähigkeiten-Profil + skill-discovery/suggestions "methods": "0.1.0", "exercises": "2.37.0", # Planungs-KI P1: Szenario-Pipeline + Query-Intent-Overlay - "planning_exercise_suggest": "0.9.0", # Phase B: Text-Signale guidance/Rahmen-Ziele; requires_partner-Filter + "planning_exercise_suggest": "0.10.0", # Phase B2: Rerank bei engem Top-Feld, auch nach Profil-LLM "training_units": "0.4.0", # POST .../publish-to-framework: Ablauf aus geplanter Einheit → Rahmen-Slot-Blueprint "training_programs": "0.1.0", "planning": "0.15.0", # Vorlagen: Strukturvorschau, Bearbeiten inkl. Split-Sessions + Beschreibung @@ -44,6 +44,14 @@ MODULE_VERSIONS = { } CHANGELOG = [ + { + "version": "0.8.182", + "date": "2026-05-23", + "changes": [ + "Planungs-KI Phase B2: LLM-Rerank bei engem Top-Feld — auch nach Erwartungs-/Intent-LLM (max. 2 Calls).", + "Preset „Nächste aus Kontext“: Rerank wenn Ranking unklar; Frontend sendet include_llm_rank immer.", + ], + }, { "version": "0.8.181", "date": "2026-05-23", diff --git a/frontend/src/components/ExercisePickerModal.jsx b/frontend/src/components/ExercisePickerModal.jsx index 6269d0a..ad53453 100644 --- a/frontend/src/components/ExercisePickerModal.jsx +++ b/frontend/src/components/ExercisePickerModal.jsx @@ -31,7 +31,6 @@ const PAGE_SIZE = 100 const PLANNING_SUGGEST_LIMIT = 50 /** Client-Hinweis — Backend entscheidet final über LLM-Gates (max. 1 Call). */ const PLANNING_LLM_INTENT_MIN_CHARS = 10 -const PLANNING_LLM_RANK_MIN_CHARS = 24 const LEVEL_FILTER_OPTS = SKILL_LEVEL_OPTIONS.filter((o) => o.level != null) const INITIAL_FILTERS = { ...INITIAL_EXERCISE_LIST_FILTERS } @@ -460,7 +459,7 @@ export default function ExercisePickerModal({ : undefined, include_llm_intent: query.length >= PLANNING_LLM_INTENT_MIN_CHARS || !(query || '').trim(), - include_llm_rank: query.length >= PLANNING_LLM_RANK_MIN_CHARS, + include_llm_rank: true, query, intent_hint: activePlanningContext.intentHint || (useFreePlanningSearch && query ? 'free_search' : null),