From 50aff849d89bc007fb209e3d0bcfdcffd138986d Mon Sep 17 00:00:00 2001
From: Lars <Lars@stommer.de>
Date: Sat, 23 May 2026 10:28:03 +0200
Subject: [PATCH] Enhance Planning Exercise Suggestion and Ranking Logic

- Introduced a new function `hybrid_ranking_ambiguous` to determine when to rerank candidates based on score proximity, improving the decision-making process for exercise suggestions.
- Updated `should_run_llm_rank_pipeline` to incorporate the new ranking logic and handle scenarios with ambiguous rankings more effectively.
- Adjusted the frontend to always include LLM ranking in requests, ensuring consistent behavior across different query lengths.
- Incremented version to 0.8.182 and updated changelog to reflect these enhancements in planning AI capabilities.
---
 backend/planning_exercise_suggest.py          |  1 +
 backend/planning_exercise_target_pipeline.py  | 60 +++++++++++++++----
 .../tests/test_planning_exercise_suggest.py   | 38 ++++++++++--
 backend/version.py                            | 12 +++-
 .../src/components/ExercisePickerModal.jsx    |  3 +-
 5 files changed, 95 insertions(+), 19 deletions(-)

diff --git a/backend/planning_exercise_suggest.py b/backend/planning_exercise_suggest.py
index dc5f148..4085b22 100644
--- a/backend/planning_exercise_suggest.py
+++ b/backend/planning_exercise_suggest.py
@@ -662,6 +662,7 @@ def suggest_planning_exercises(
         include_llm_rank=body.include_llm_rank,
         query_intent_applied=query_intent_applied,
         llm_expectation_applied=llm_expectation_applied,
+        has_planning_reference=has_plan_ref,
         hits=hits,
     )
     if run_llm_rank:
diff --git a/backend/planning_exercise_target_pipeline.py b/backend/planning_exercise_target_pipeline.py
index ac4b403..3846c34 100644
--- a/backend/planning_exercise_target_pipeline.py
+++ b/backend/planning_exercise_target_pipeline.py
@@ -10,7 +10,7 @@ Ablauf:
 from __future__ import annotations
 
 import re
-from typing import Any, Dict, List, Mapping, Optional, Tuple
+from typing import Any, Dict, List, Mapping, Optional, Sequence, Tuple
 
 from planning_exercise_expectation import try_build_planning_expectation_from_context
 from planning_exercise_intent import (
@@ -135,6 +135,31 @@ def deterministic_rank_confident(hits: Sequence[Mapping[str, Any]], *, gap_thres
     return (top - fourth) >= gap_threshold
 
 
+def hybrid_ranking_ambiguous(
+    hits: Sequence[Mapping[str, Any]],
+    *,
+    top_four_gap: float = 0.08,
+    top_ten_gap: float = 0.055,
+) -> bool:
+    """True wenn Top-Kandidaten scores zu nah beieinander liegen — Rerank lohnt sich."""
+    if len(hits) < 3:
+        return False
+    top = float(hits[0].get("score") or 0.0)
+    if len(hits) >= 4:
+        fourth = float(hits[3].get("score") or 0.0)
+        if (top - fourth) < top_four_gap:
+            return True
+    if len(hits) >= 10:
+        tenth = float(hits[9].get("score") or 0.0)
+        if (top - tenth) < top_ten_gap:
+            return True
+    elif len(hits) >= 2:
+        tail = float(hits[min(len(hits) - 1, 9)].get("score") or 0.0)
+        if (top - tail) < top_four_gap:
+            return True
+    return False
+
+
 def should_run_llm_rank_pipeline(
     query: Optional[str],
     scenario: str,
@@ -142,26 +167,38 @@ def should_run_llm_rank_pipeline(
     include_llm_rank: bool,
     query_intent_applied: bool,
     llm_expectation_applied: bool = False,
+    has_planning_reference: bool = True,
     hits: Sequence[Mapping[str, Any]],
 ) -> bool:
     """
-    Maximal ein LLM-Call pro Request: wenn Intent- oder Erwartungs-LLM lief, kein Rerank.
-    Rerank nur bei längerer, komplexer Anfrage und unklarem Hybrid-Ranking.
+    Phase B2: Rerank bei unklarem Hybrid-Ranking — auch nach Erwartungs-/Intent-LLM.
+
+    Budget: max. 2 LLM-Calls pro Suche (Profil-LLM + optional Rerank).
     """
     if not include_llm_rank:
         return False
-    if query_intent_applied or llm_expectation_applied:
+    if len(hits) < 3:
         return False
-    if scenario == SCENARIO_PRESET_NEXT:
+    if not hybrid_ranking_ambiguous(hits):
         return False
+
     q = _normalize_query(query)
-    if not q:
-        return False
+    profile_llm = query_intent_applied or llm_expectation_applied
+
+    if scenario == SCENARIO_PRESET_NEXT:
+        return has_planning_reference
+
+    if scenario == SCENARIO_FREE_SEARCH:
+        if len(q) < 10 and not profile_llm:
+            return False
+        return True
+
     if scenario == SCENARIO_ADDITIVE:
-        return len(q) >= 12 and not deterministic_rank_confident(hits)
-    if len(q) < 22:
-        return False
-    return not deterministic_rank_confident(hits)
+        return len(q) >= 8 or profile_llm
+
+    if profile_llm:
+        return True
+    return len(q) >= 14
 
 
 def _recalculate_skill_gap(target: PlanningTargetProfile) -> PlanningTargetProfile:
@@ -420,4 +457,5 @@ __all__ = [
     "should_run_llm_intent_pipeline",
     "should_run_llm_rank_pipeline",
     "deterministic_rank_confident",
+    "hybrid_ranking_ambiguous",
 ]
diff --git a/backend/tests/test_planning_exercise_suggest.py b/backend/tests/test_planning_exercise_suggest.py
index 4f5a951..34d026e 100644
--- a/backend/tests/test_planning_exercise_suggest.py
+++ b/backend/tests/test_planning_exercise_suggest.py
@@ -56,11 +56,11 @@ def test_should_skip_llm_intent_short_free_search():
     )
 
 
-def test_should_skip_llm_rank_when_intent_already_applied():
+def test_should_run_llm_rank_when_intent_applied_and_ambiguous():
     from planning_exercise_target_pipeline import SCENARIO_ADDITIVE, should_run_llm_rank_pipeline
 
     hits = [{"score": 0.5}, {"score": 0.48}, {"score": 0.47}, {"score": 0.46}]
-    assert not should_run_llm_rank_pipeline(
+    assert should_run_llm_rank_pipeline(
         "Baut auf dem Plan auf und trainiert zusätzlich Schnellkraft mit Partner",
         SCENARIO_ADDITIVE,
         include_llm_rank=True,
@@ -69,6 +69,36 @@ def test_should_skip_llm_rank_when_intent_already_applied():
     )
 
 
+def test_should_skip_llm_rank_when_ranking_confident():
+    from planning_exercise_target_pipeline import SCENARIO_PRESET_NEXT, should_run_llm_rank_pipeline
+
+    hits = [{"score": 0.9}, {"score": 0.5}, {"score": 0.4}, {"score": 0.3}]
+    assert not should_run_llm_rank_pipeline(
+        "",
+        SCENARIO_PRESET_NEXT,
+        include_llm_rank=True,
+        query_intent_applied=False,
+        llm_expectation_applied=True,
+        has_planning_reference=True,
+        hits=hits,
+    )
+
+
+def test_should_run_llm_rank_for_preset_when_ambiguous():
+    from planning_exercise_target_pipeline import SCENARIO_PRESET_NEXT, should_run_llm_rank_pipeline
+
+    hits = [{"score": 0.42}, {"score": 0.41}, {"score": 0.4}, {"score": 0.39}]
+    assert should_run_llm_rank_pipeline(
+        "",
+        SCENARIO_PRESET_NEXT,
+        include_llm_rank=True,
+        query_intent_applied=False,
+        llm_expectation_applied=True,
+        has_planning_reference=True,
+        hits=hits,
+    )
+
+
 def test_compose_retrieval_phase():
     assert compose_retrieval_phase(query_intent=False, llm_rank=False) == "profile_v1"
     assert compose_retrieval_phase(query_intent=True, llm_rank=True) == "profile_v1+query_intent+llm_rank"
@@ -99,10 +129,10 @@ def test_should_run_llm_expectation_for_preset_with_planning_ref():
     )
 
 
-def test_should_skip_llm_rank_when_expectation_applied():
+def test_should_skip_llm_rank_when_expectation_applied_but_confident():
     from planning_exercise_target_pipeline import SCENARIO_PRESET_NEXT, should_run_llm_rank_pipeline
 
-    hits = [{"score": 0.5}, {"score": 0.48}, {"score": 0.47}, {"score": 0.46}]
+    hits = [{"score": 0.85}, {"score": 0.4}, {"score": 0.35}, {"score": 0.3}]
     assert not should_run_llm_rank_pipeline(
         "",
         SCENARIO_PRESET_NEXT,
diff --git a/backend/version.py b/backend/version.py
index 62b9181..ae4b50f 100644
--- a/backend/version.py
+++ b/backend/version.py
@@ -1,6 +1,6 @@
 # Shinkan Jinkendo Version Information
 
-APP_VERSION = "0.8.181"
+APP_VERSION = "0.8.182"
 BUILD_DATE = "2026-05-23"
 DB_SCHEMA_VERSION = "20260531074"
 
@@ -29,7 +29,7 @@ MODULE_VERSIONS = {
     "skill_profiles": "1.0.0",  # Phase 3: gewichtetes Fähigkeiten-Profil + skill-discovery/suggestions
     "methods": "0.1.0",
     "exercises": "2.37.0",  # Planungs-KI P1: Szenario-Pipeline + Query-Intent-Overlay
-    "planning_exercise_suggest": "0.9.0",  # Phase B: Text-Signale guidance/Rahmen-Ziele; requires_partner-Filter
+    "planning_exercise_suggest": "0.10.0",  # Phase B2: Rerank bei engem Top-Feld, auch nach Profil-LLM
     "training_units": "0.4.0",  # POST .../publish-to-framework: Ablauf aus geplanter Einheit → Rahmen-Slot-Blueprint
     "training_programs": "0.1.0",
     "planning": "0.15.0",  # Vorlagen: Strukturvorschau, Bearbeiten inkl. Split-Sessions + Beschreibung
@@ -44,6 +44,14 @@ MODULE_VERSIONS = {
 }
 
 CHANGELOG = [
+    {
+        "version": "0.8.182",
+        "date": "2026-05-23",
+        "changes": [
+            "Planungs-KI Phase B2: LLM-Rerank bei engem Top-Feld — auch nach Erwartungs-/Intent-LLM (max. 2 Calls).",
+            "Preset „Nächste aus Kontext“: Rerank wenn Ranking unklar; Frontend sendet include_llm_rank immer.",
+        ],
+    },
     {
         "version": "0.8.181",
         "date": "2026-05-23",
diff --git a/frontend/src/components/ExercisePickerModal.jsx b/frontend/src/components/ExercisePickerModal.jsx
index 6269d0a..ad53453 100644
--- a/frontend/src/components/ExercisePickerModal.jsx
+++ b/frontend/src/components/ExercisePickerModal.jsx
@@ -31,7 +31,6 @@ const PAGE_SIZE = 100
 const PLANNING_SUGGEST_LIMIT = 50
 /** Client-Hinweis — Backend entscheidet final über LLM-Gates (max. 1 Call). */
 const PLANNING_LLM_INTENT_MIN_CHARS = 10
-const PLANNING_LLM_RANK_MIN_CHARS = 24
 const LEVEL_FILTER_OPTS = SKILL_LEVEL_OPTIONS.filter((o) => o.level != null)
 
 const INITIAL_FILTERS = { ...INITIAL_EXERCISE_LIST_FILTERS }
@@ -460,7 +459,7 @@ export default function ExercisePickerModal({
             : undefined,
         include_llm_intent:
           query.length >= PLANNING_LLM_INTENT_MIN_CHARS || !(query || '').trim(),
-        include_llm_rank: query.length >= PLANNING_LLM_RANK_MIN_CHARS,
+        include_llm_rank: true,
         query,
         intent_hint:
           activePlanningContext.intentHint || (useFreePlanningSearch && query ? 'free_search' : null),