Enhance Planning Exercise Suggestion and Ranking Logic

- Introduced a new function `hybrid_ranking_ambiguous` to determine when to rerank candidates based on score proximity, improving the decision-making process for exercise suggestions. - Updated `should_run_llm_rank_pipeline` to incorporate the new ranking logic and handle scenarios with ambiguous rankings more effectively. - Adjusted the frontend to always include LLM ranking in requests, ensuring consistent behavior across different query lengths. - Incremented version to 0.8.182 and updated changelog to reflect these enhancements in planning AI capabilities.
2026-05-23 10:28:03 +02:00 · 2026-05-23 10:28:03 +02:00 · 50aff849d8
commit 50aff849d8
parent a0a891e550
5 changed files with 95 additions and 19 deletions
--- a/backend/planning_exercise_suggest.py
+++ b/backend/planning_exercise_suggest.py
@ -662,6 +662,7 @@ def suggest_planning_exercises(
        include_llm_rank=body.include_llm_rank,
        query_intent_applied=query_intent_applied,
        llm_expectation_applied=llm_expectation_applied,
+        has_planning_reference=has_plan_ref,
        hits=hits,
    )
    if run_llm_rank:
--- a/backend/planning_exercise_target_pipeline.py
+++ b/backend/planning_exercise_target_pipeline.py
@ -10,7 +10,7 @@ Ablauf:
 from __future__ import annotations

 import re
-from typing import Any, Dict, List, Mapping, Optional, Tuple
+from typing import Any, Dict, List, Mapping, Optional, Sequence, Tuple

 from planning_exercise_expectation import try_build_planning_expectation_from_context
 from planning_exercise_intent import (
@ -135,6 +135,31 @@ def deterministic_rank_confident(hits: Sequence[Mapping[str, Any]], *, gap_thres
    return (top - fourth) >= gap_threshold


+def hybrid_ranking_ambiguous(
+    hits: Sequence[Mapping[str, Any]],
+    *,
+    top_four_gap: float = 0.08,
+    top_ten_gap: float = 0.055,
+) -> bool:
+    """True wenn Top-Kandidaten scores zu nah beieinander liegen — Rerank lohnt sich."""
+    if len(hits) < 3:
+        return False
+    top = float(hits[0].get("score") or 0.0)
+    if len(hits) >= 4:
+        fourth = float(hits[3].get("score") or 0.0)
+        if (top - fourth) < top_four_gap:
+            return True
+    if len(hits) >= 10:
+        tenth = float(hits[9].get("score") or 0.0)
+        if (top - tenth) < top_ten_gap:
+            return True
+    elif len(hits) >= 2:
+        tail = float(hits[min(len(hits) - 1, 9)].get("score") or 0.0)
+        if (top - tail) < top_four_gap:
+            return True
+    return False
+
+
 def should_run_llm_rank_pipeline(
    query: Optional[str],
    scenario: str,
@ -142,26 +167,38 @@ def should_run_llm_rank_pipeline(
    include_llm_rank: bool,
    query_intent_applied: bool,
    llm_expectation_applied: bool = False,
+    has_planning_reference: bool = True,
    hits: Sequence[Mapping[str, Any]],
 ) -> bool:
    """
-    Maximal ein LLM-Call pro Request: wenn Intent- oder Erwartungs-LLM lief, kein Rerank.
-    Rerank nur bei längerer, komplexer Anfrage und unklarem Hybrid-Ranking.
+    Phase B2: Rerank bei unklarem Hybrid-Ranking — auch nach Erwartungs-/Intent-LLM.
+
+    Budget: max. 2 LLM-Calls pro Suche (Profil-LLM + optional Rerank).
    """
    if not include_llm_rank:
        return False
-    if query_intent_applied or llm_expectation_applied:
+    if len(hits) < 3:
        return False
-    if scenario == SCENARIO_PRESET_NEXT:
+    if not hybrid_ranking_ambiguous(hits):
        return False
+
    q = _normalize_query(query)
-    if not q:
-        return False
+    profile_llm = query_intent_applied or llm_expectation_applied
+
+    if scenario == SCENARIO_PRESET_NEXT:
+        return has_planning_reference
+
+    if scenario == SCENARIO_FREE_SEARCH:
+        if len(q) < 10 and not profile_llm:
+            return False
+        return True
+
    if scenario == SCENARIO_ADDITIVE:
-        return len(q) >= 12 and not deterministic_rank_confident(hits)
-    if len(q) < 22:
-        return False
-    return not deterministic_rank_confident(hits)
+        return len(q) >= 8 or profile_llm
+
+    if profile_llm:
+        return True
+    return len(q) >= 14


 def _recalculate_skill_gap(target: PlanningTargetProfile) -> PlanningTargetProfile:
@ -420,4 +457,5 @@ __all__ = [
    "should_run_llm_intent_pipeline",
    "should_run_llm_rank_pipeline",
    "deterministic_rank_confident",
+    "hybrid_ranking_ambiguous",
 ]
--- a/backend/tests/test_planning_exercise_suggest.py
+++ b/backend/tests/test_planning_exercise_suggest.py
@ -56,11 +56,11 @@ def test_should_skip_llm_intent_short_free_search():
    )


-def test_should_skip_llm_rank_when_intent_already_applied():
+def test_should_run_llm_rank_when_intent_applied_and_ambiguous():
    from planning_exercise_target_pipeline import SCENARIO_ADDITIVE, should_run_llm_rank_pipeline

    hits = [{"score": 0.5}, {"score": 0.48}, {"score": 0.47}, {"score": 0.46}]
-    assert not should_run_llm_rank_pipeline(
+    assert should_run_llm_rank_pipeline(
        "Baut auf dem Plan auf und trainiert zusätzlich Schnellkraft mit Partner",
        SCENARIO_ADDITIVE,
        include_llm_rank=True,
@ -69,6 +69,36 @@ def test_should_skip_llm_rank_when_intent_already_applied():
    )


+def test_should_skip_llm_rank_when_ranking_confident():
+    from planning_exercise_target_pipeline import SCENARIO_PRESET_NEXT, should_run_llm_rank_pipeline
+
+    hits = [{"score": 0.9}, {"score": 0.5}, {"score": 0.4}, {"score": 0.3}]
+    assert not should_run_llm_rank_pipeline(
+        "",
+        SCENARIO_PRESET_NEXT,
+        include_llm_rank=True,
+        query_intent_applied=False,
+        llm_expectation_applied=True,
+        has_planning_reference=True,
+        hits=hits,
+    )
+
+
+def test_should_run_llm_rank_for_preset_when_ambiguous():
+    from planning_exercise_target_pipeline import SCENARIO_PRESET_NEXT, should_run_llm_rank_pipeline
+
+    hits = [{"score": 0.42}, {"score": 0.41}, {"score": 0.4}, {"score": 0.39}]
+    assert should_run_llm_rank_pipeline(
+        "",
+        SCENARIO_PRESET_NEXT,
+        include_llm_rank=True,
+        query_intent_applied=False,
+        llm_expectation_applied=True,
+        has_planning_reference=True,
+        hits=hits,
+    )
+
+
 def test_compose_retrieval_phase():
    assert compose_retrieval_phase(query_intent=False, llm_rank=False) == "profile_v1"
    assert compose_retrieval_phase(query_intent=True, llm_rank=True) == "profile_v1+query_intent+llm_rank"
@ -99,10 +129,10 @@ def test_should_run_llm_expectation_for_preset_with_planning_ref():
    )


-def test_should_skip_llm_rank_when_expectation_applied():
+def test_should_skip_llm_rank_when_expectation_applied_but_confident():
    from planning_exercise_target_pipeline import SCENARIO_PRESET_NEXT, should_run_llm_rank_pipeline

-    hits = [{"score": 0.5}, {"score": 0.48}, {"score": 0.47}, {"score": 0.46}]
+    hits = [{"score": 0.85}, {"score": 0.4}, {"score": 0.35}, {"score": 0.3}]
    assert not should_run_llm_rank_pipeline(
        "",
        SCENARIO_PRESET_NEXT,
--- a/backend/version.py
+++ b/backend/version.py
@ -1,6 +1,6 @@
 # Shinkan Jinkendo Version Information

-APP_VERSION = "0.8.181"
+APP_VERSION = "0.8.182"
 BUILD_DATE = "2026-05-23"
 DB_SCHEMA_VERSION = "20260531074"

@ -29,7 +29,7 @@ MODULE_VERSIONS = {
    "skill_profiles": "1.0.0",  # Phase 3: gewichtetes Fähigkeiten-Profil + skill-discovery/suggestions
    "methods": "0.1.0",
    "exercises": "2.37.0",  # Planungs-KI P1: Szenario-Pipeline + Query-Intent-Overlay
-    "planning_exercise_suggest": "0.9.0",  # Phase B: Text-Signale guidance/Rahmen-Ziele; requires_partner-Filter
+    "planning_exercise_suggest": "0.10.0",  # Phase B2: Rerank bei engem Top-Feld, auch nach Profil-LLM
    "training_units": "0.4.0",  # POST .../publish-to-framework: Ablauf aus geplanter Einheit → Rahmen-Slot-Blueprint
    "training_programs": "0.1.0",
    "planning": "0.15.0",  # Vorlagen: Strukturvorschau, Bearbeiten inkl. Split-Sessions + Beschreibung
@ -44,6 +44,14 @@ MODULE_VERSIONS = {
 }

 CHANGELOG = [
+    {
+        "version": "0.8.182",
+        "date": "2026-05-23",
+        "changes": [
+            "Planungs-KI Phase B2: LLM-Rerank bei engem Top-Feld — auch nach Erwartungs-/Intent-LLM (max. 2 Calls).",
+            "Preset „Nächste aus Kontext“: Rerank wenn Ranking unklar; Frontend sendet include_llm_rank immer.",
+        ],
+    },
    {
        "version": "0.8.181",
        "date": "2026-05-23",
--- a/frontend/src/components/ExercisePickerModal.jsx
+++ b/frontend/src/components/ExercisePickerModal.jsx
@ -31,7 +31,6 @@ const PAGE_SIZE = 100
 const PLANNING_SUGGEST_LIMIT = 50
 /** Client-Hinweis — Backend entscheidet final über LLM-Gates (max. 1 Call). */
 const PLANNING_LLM_INTENT_MIN_CHARS = 10
-const PLANNING_LLM_RANK_MIN_CHARS = 24
 const LEVEL_FILTER_OPTS = SKILL_LEVEL_OPTIONS.filter((o) => o.level != null)

 const INITIAL_FILTERS = { ...INITIAL_EXERCISE_LIST_FILTERS }
@ -460,7 +459,7 @@ export default function ExercisePickerModal({
            : undefined,
        include_llm_intent:
          query.length >= PLANNING_LLM_INTENT_MIN_CHARS || !(query || '').trim(),
-        include_llm_rank: query.length >= PLANNING_LLM_RANK_MIN_CHARS,
+        include_llm_rank: true,
        query,
        intent_hint:
          activePlanningContext.intentHint || (useFreePlanningSearch && query ? 'free_search' : null),