From e828a5da32e180757b53bc4c9a5119ffa38f02ff Mon Sep 17 00:00:00 2001 From: Lars Date: Sat, 13 Jun 2026 07:44:01 +0200 Subject: [PATCH] Enhance Progression Path Evaluation and Comparison Logic - Introduced `_steps_to_evaluate_payloads` to convert path steps into evaluation payloads for improved quality assessments. - Updated `_build_progression_compare_response` to include a new `proposed_eval` parameter, allowing for fair quality assessment comparisons. - Enhanced `ProgressionGraphEditor` to utilize the new pipeline quality assessment data. - Modified `ProgressionOptimizeCompareModal` to display detailed comparison results, including handling of trivial slot differences and optimization hints. - Bumped version to reflect the new features and improvements. --- backend/planning_exercise_path_builder.py | 83 +++++++++++++++++-- .../tests/test_planning_compare_slot_diffs.py | 47 +++++++++++ .../src/components/ProgressionGraphEditor.jsx | 3 +- .../ProgressionOptimizeCompareModal.jsx | 58 +++++++++++-- 4 files changed, 175 insertions(+), 16 deletions(-) create mode 100644 backend/tests/test_planning_compare_slot_diffs.py diff --git a/backend/planning_exercise_path_builder.py b/backend/planning_exercise_path_builder.py index 795360c..8084c99 100644 --- a/backend/planning_exercise_path_builder.py +++ b/backend/planning_exercise_path_builder.py @@ -2176,6 +2176,52 @@ def _steps_by_major_index(steps: Sequence[Mapping[str, Any]]) -> Dict[int, Dict[ return out +def _steps_to_evaluate_payloads(steps: Sequence[Mapping[str, Any]]) -> List[EvaluateStepPayload]: + """Pfad-Schritte → evaluate_steps (für faire QS auf dem End-Stand).""" + payloads: List[EvaluateStepPayload] = [] + for step in steps or []: + if not isinstance(step, dict): + continue + midx = step.get("roadmap_major_step_index") + if midx is None: + continue + eid = step.get("exercise_id") + is_proposal = bool(step.get("is_ai_proposal")) or eid is None + payloads.append( + EvaluateStepPayload( + exercise_id=int(eid) if eid is not None and not is_proposal else None, + variant_id=step.get("variant_id"), + title=step.get("title"), + is_ai_proposal=is_proposal, + ai_suggestion=step.get("ai_suggestion") if isinstance(step.get("ai_suggestion"), dict) else None, + proposal_key=step.get("proposal_key"), + roadmap_major_step_index=int(midx), + roadmap_phase=step.get("roadmap_phase"), + roadmap_learning_goal=step.get("roadmap_learning_goal"), + ) + ) + payloads.sort(key=lambda p: int(p.roadmap_major_step_index or 0)) + return payloads + + +def _normalize_slot_title(title: Optional[str]) -> str: + return (title or "").strip().casefold() + + +def _filter_trivial_slot_diffs(diffs: Sequence[Mapping[str, Any]]) -> List[Dict[str, Any]]: + """Gleicher sichtbarer Titel = kein inhaltlicher Wechsel (nur ID-Doppel in der Bibliothek).""" + out: List[Dict[str, Any]] = [] + for raw in diffs or []: + if not isinstance(raw, dict): + continue + bt = _normalize_slot_title(raw.get("baseline_title")) + pt = _normalize_slot_title(raw.get("proposed_title")) + if bt and pt and bt == pt: + continue + out.append(dict(raw)) + return out + + def _build_progression_slot_diffs( baseline_steps: Sequence[Mapping[str, Any]], proposed_steps: Sequence[Mapping[str, Any]], @@ -2211,24 +2257,35 @@ def _build_progression_slot_diffs( def _build_progression_compare_response( baseline: Mapping[str, Any], proposed: Mapping[str, Any], + *, + proposed_eval: Optional[Mapping[str, Any]] = None, ) -> Dict[str, Any]: baseline_steps = list(baseline.get("steps") or []) proposed_steps = list(proposed.get("steps") or []) baseline_qa = baseline.get("path_qa") if isinstance(baseline.get("path_qa"), dict) else {} - proposed_qa = proposed.get("path_qa") if isinstance(proposed.get("path_qa"), dict) else {} - slot_diffs = _build_progression_slot_diffs(baseline_steps, proposed_steps) + pipeline_qa = proposed.get("path_qa") if isinstance(proposed.get("path_qa"), dict) else {} + fair_qa = ( + proposed_eval.get("path_qa") + if isinstance(proposed_eval, dict) and isinstance(proposed_eval.get("path_qa"), dict) + else pipeline_qa + ) + slot_diffs = _filter_trivial_slot_diffs( + _build_progression_slot_diffs(baseline_steps, proposed_steps), + ) return { **dict(proposed), "comparison_mode": True, "baseline_steps": baseline_steps, "baseline_path_qa": baseline_qa, "proposed_steps": proposed_steps, - "proposed_path_qa": proposed_qa, + "proposed_path_qa": fair_qa, + "proposed_path_qa_pipeline": pipeline_qa, "slot_diffs": slot_diffs, "slot_diff_count": len(slot_diffs), "baseline_quality_score": _path_qa_quality_score(baseline_qa), - "proposed_quality_score": _path_qa_quality_score(proposed_qa), - "path_qa": proposed_qa, + "proposed_quality_score": _path_qa_quality_score(fair_qa), + "proposed_pipeline_quality_score": _path_qa_quality_score(pipeline_qa), + "path_qa": fair_qa, "steps": proposed_steps, } @@ -2273,7 +2330,21 @@ def suggest_progression_path( } ) proposed = suggest_progression_path(cur, tenant=tenant, body=proposed_body) - return _build_progression_compare_response(baseline, proposed) + proposed_eval_payloads = _steps_to_evaluate_payloads(proposed.get("steps") or []) + proposed_eval: Optional[Dict[str, Any]] = None + if proposed_eval_payloads: + proposed_eval_body = body.model_copy( + update={ + "evaluate_only": True, + "evaluate_steps": proposed_eval_payloads, + "compare_with_assignments": False, + "include_llm_intent": False, + "auto_rematch_after_qa": False, + "include_roadmap_preview": False, + } + ) + proposed_eval = suggest_progression_path(cur, tenant=tenant, body=proposed_eval_body) + return _build_progression_compare_response(baseline, proposed, proposed_eval=proposed_eval) goal_query = _normalize_query(body.query) if len(goal_query) < 3: diff --git a/backend/tests/test_planning_compare_slot_diffs.py b/backend/tests/test_planning_compare_slot_diffs.py new file mode 100644 index 0000000..cef4d85 --- /dev/null +++ b/backend/tests/test_planning_compare_slot_diffs.py @@ -0,0 +1,47 @@ +"""Tests Vergleichs-Diffs (triviale ID-Tausche ausfiltern).""" +from planning_exercise_path_builder import ( + _build_progression_slot_diffs, + _filter_trivial_slot_diffs, +) + + +def test_filter_trivial_slot_diffs_same_title_different_id(): + diffs = [ + { + "roadmap_major_step_index": 1, + "baseline_exercise_id": 10, + "baseline_title": "Rhythmuswechsel in der Kumite-Beinarbeit", + "proposed_exercise_id": 99, + "proposed_title": "Rhythmuswechsel in der Kumite-Beinarbeit", + } + ] + assert _filter_trivial_slot_diffs(diffs) == [] + + +def test_filter_trivial_slot_diffs_keeps_real_title_change(): + diffs = [ + { + "roadmap_major_step_index": 1, + "baseline_exercise_id": 10, + "baseline_title": "Alt", + "proposed_exercise_id": 99, + "proposed_title": "Neu", + } + ] + filtered = _filter_trivial_slot_diffs(diffs) + assert len(filtered) == 1 + assert filtered[0]["proposed_title"] == "Neu" + + +def test_build_slot_diffs_then_filter(): + baseline = [ + {"roadmap_major_step_index": 0, "exercise_id": 1, "title": "A"}, + {"roadmap_major_step_index": 1, "exercise_id": 10, "title": "Gleich"}, + ] + proposed = [ + {"roadmap_major_step_index": 0, "exercise_id": 1, "title": "A"}, + {"roadmap_major_step_index": 1, "exercise_id": 77, "title": "Gleich"}, + ] + raw = _build_progression_slot_diffs(baseline, proposed) + assert len(raw) == 1 + assert _filter_trivial_slot_diffs(raw) == [] diff --git a/frontend/src/components/ProgressionGraphEditor.jsx b/frontend/src/components/ProgressionGraphEditor.jsx index 40caac1..785cccd 100644 --- a/frontend/src/components/ProgressionGraphEditor.jsx +++ b/frontend/src/components/ProgressionGraphEditor.jsx @@ -505,8 +505,9 @@ export default function ProgressionGraphEditor({ graphId, embedded = false, onSa setTargetSummary(res?.target_profile_summary || null) const baselineQa = res?.baseline_path_qa || null const proposedQa = res?.proposed_path_qa || res?.path_qa || null + const pipelineQa = res?.proposed_path_qa_pipeline || null setPathQa(baselineQa) - setProposedPathQa(proposedQa) + setProposedPathQa(pipelineQa) const openCompareDialog = (diffCount, noticePrefix) => { setComparePayload(res) diff --git a/frontend/src/components/ProgressionOptimizeCompareModal.jsx b/frontend/src/components/ProgressionOptimizeCompareModal.jsx index 864ebdf..648dae7 100644 --- a/frontend/src/components/ProgressionOptimizeCompareModal.jsx +++ b/frontend/src/components/ProgressionOptimizeCompareModal.jsx @@ -37,11 +37,17 @@ export default function ProgressionOptimizeCompareModal({ const proposedQa = comparison.proposed_path_qa || comparison.path_qa const baselinePct = pathQaQualityPercent(baselineQa) const proposedPct = pathQaQualityPercent(proposedQa) - const rematchRounds = proposedQa?.rematch_rounds - const rematchCount = Array.isArray(proposedQa?.rematch_log) ? proposedQa.rematch_log.length : 0 - const refineCount = Array.isArray(proposedQa?.refine_log) ? proposedQa.refine_log.length : 0 - const hintCount = Number(proposedQa?.optimization_hint_count || 0) - const tierCount = Array.isArray(proposedQa?.qa_tiers) ? proposedQa.qa_tiers.length : 0 + const pipelinePct = pathQaQualityPercent(comparison?.proposed_path_qa_pipeline) + const rematchRounds = comparison?.proposed_path_qa_pipeline?.rematch_rounds + ?? proposedQa?.rematch_rounds + const pipelineQa = comparison?.proposed_path_qa_pipeline + const rematchCount = Array.isArray(pipelineQa?.rematch_log) ? pipelineQa.rematch_log.length : 0 + const refineCount = Array.isArray(pipelineQa?.refine_log) ? pipelineQa.refine_log.length : 0 + const hintCount = Number(pipelineQa?.optimization_hint_count || 0) + const tierCount = Array.isArray(pipelineQa?.qa_tiers) ? pipelineQa.qa_tiers.length : 0 + const noMeaningfulDiffs = slotDiffs.length === 0 + const proposedNotBetter = + proposedPct != null && baselinePct != null && proposedPct <= baselinePct const toggle = (midx) => { setSelected((prev) => { @@ -75,10 +81,42 @@ export default function ProgressionOptimizeCompareModal({ Optimierung vergleichen

- Links dein aktueller Pfad, rechts der Vorschlag nach vollem Match inkl. Auto-Optimierung. - Wähle die Slots, die du übernehmen möchtest. + Vergleicht deinen Pfad mit dem End-Stand nach Match — beide Seiten mit derselben Bewertungslogik + wie „Graph bewerten“. Auto-Rematch-Details stehen im Panel, nicht in der Prozentzahl.

+ {noMeaningfulDiffs || proposedNotBetter ? ( +
+ {noMeaningfulDiffs ? ( + Keine inhaltlichen Slot-Änderungen + ) : ( + Vorschlag nicht besser als dein Pfad + )} + {noMeaningfulDiffs ? ( +

+ Rematch hat höchstens dieselben Übungen unter anderen IDs getroffen — kein Grund zur + Übernahme. Bitte abbrechen. +

+ ) : ( +

+ Fair bewertet liefert der Vorschlag keinen höheren Pfad-QS-Wert. Die frühere niedrigere + Pipeline-Zahl{pipelinePct != null ? ` (${pipelinePct} %)` : ''} stammte aus dem + Rematch-Lauf, nicht aus dem sichtbaren End-Pfad. +

+ )} +
+ ) : null} +
- Keine abweichenden Slot-Zuordnungen — der optimierte Lauf liefert denselben Pfad. + Keine inhaltlichen Abweichungen — der End-Stand entspricht deinem Pfad.

) : ( <> @@ -187,9 +225,11 @@ export default function ProgressionOptimizeCompareModal({ > Bisher: {diff.baseline_title || '— leer —'} + {diff.baseline_exercise_id != null ? ` (#${diff.baseline_exercise_id})` : ''} Neu: {diff.proposed_title || '— leer —'} + {diff.proposed_exercise_id != null ? ` (#${diff.proposed_exercise_id})` : ''}
@@ -208,7 +248,7 @@ export default function ProgressionOptimizeCompareModal({