Enhance Progression Path Evaluation and Comparison Logic
All checks were successful
Deploy Development / deploy (push) Successful in 45s
Test Suite / pytest-backend (push) Successful in 45s
Test Suite / lint-backend (push) Successful in 0s
Test Suite / build-frontend (push) Successful in 15s
Test Suite / k6 /health Baseline (push) Successful in 38s
Test Suite / playwright-tests (push) Successful in 1m23s

- Introduced `_steps_to_evaluate_payloads` to convert path steps into evaluation payloads for improved quality assessments.
- Updated `_build_progression_compare_response` to include a new `proposed_eval` parameter, allowing for fair quality assessment comparisons.
- Enhanced `ProgressionGraphEditor` to utilize the new pipeline quality assessment data.
- Modified `ProgressionOptimizeCompareModal` to display detailed comparison results, including handling of trivial slot differences and optimization hints.
- Bumped version to reflect the new features and improvements.
This commit is contained in:
Lars 2026-06-13 07:44:01 +02:00
parent 5bca5ef9eb
commit e828a5da32
4 changed files with 175 additions and 16 deletions

View File

@ -2176,6 +2176,52 @@ def _steps_by_major_index(steps: Sequence[Mapping[str, Any]]) -> Dict[int, Dict[
return out
def _steps_to_evaluate_payloads(steps: Sequence[Mapping[str, Any]]) -> List[EvaluateStepPayload]:
"""Pfad-Schritte → evaluate_steps (für faire QS auf dem End-Stand)."""
payloads: List[EvaluateStepPayload] = []
for step in steps or []:
if not isinstance(step, dict):
continue
midx = step.get("roadmap_major_step_index")
if midx is None:
continue
eid = step.get("exercise_id")
is_proposal = bool(step.get("is_ai_proposal")) or eid is None
payloads.append(
EvaluateStepPayload(
exercise_id=int(eid) if eid is not None and not is_proposal else None,
variant_id=step.get("variant_id"),
title=step.get("title"),
is_ai_proposal=is_proposal,
ai_suggestion=step.get("ai_suggestion") if isinstance(step.get("ai_suggestion"), dict) else None,
proposal_key=step.get("proposal_key"),
roadmap_major_step_index=int(midx),
roadmap_phase=step.get("roadmap_phase"),
roadmap_learning_goal=step.get("roadmap_learning_goal"),
)
)
payloads.sort(key=lambda p: int(p.roadmap_major_step_index or 0))
return payloads
def _normalize_slot_title(title: Optional[str]) -> str:
return (title or "").strip().casefold()
def _filter_trivial_slot_diffs(diffs: Sequence[Mapping[str, Any]]) -> List[Dict[str, Any]]:
"""Gleicher sichtbarer Titel = kein inhaltlicher Wechsel (nur ID-Doppel in der Bibliothek)."""
out: List[Dict[str, Any]] = []
for raw in diffs or []:
if not isinstance(raw, dict):
continue
bt = _normalize_slot_title(raw.get("baseline_title"))
pt = _normalize_slot_title(raw.get("proposed_title"))
if bt and pt and bt == pt:
continue
out.append(dict(raw))
return out
def _build_progression_slot_diffs(
baseline_steps: Sequence[Mapping[str, Any]],
proposed_steps: Sequence[Mapping[str, Any]],
@ -2211,24 +2257,35 @@ def _build_progression_slot_diffs(
def _build_progression_compare_response(
baseline: Mapping[str, Any],
proposed: Mapping[str, Any],
*,
proposed_eval: Optional[Mapping[str, Any]] = None,
) -> Dict[str, Any]:
baseline_steps = list(baseline.get("steps") or [])
proposed_steps = list(proposed.get("steps") or [])
baseline_qa = baseline.get("path_qa") if isinstance(baseline.get("path_qa"), dict) else {}
proposed_qa = proposed.get("path_qa") if isinstance(proposed.get("path_qa"), dict) else {}
slot_diffs = _build_progression_slot_diffs(baseline_steps, proposed_steps)
pipeline_qa = proposed.get("path_qa") if isinstance(proposed.get("path_qa"), dict) else {}
fair_qa = (
proposed_eval.get("path_qa")
if isinstance(proposed_eval, dict) and isinstance(proposed_eval.get("path_qa"), dict)
else pipeline_qa
)
slot_diffs = _filter_trivial_slot_diffs(
_build_progression_slot_diffs(baseline_steps, proposed_steps),
)
return {
**dict(proposed),
"comparison_mode": True,
"baseline_steps": baseline_steps,
"baseline_path_qa": baseline_qa,
"proposed_steps": proposed_steps,
"proposed_path_qa": proposed_qa,
"proposed_path_qa": fair_qa,
"proposed_path_qa_pipeline": pipeline_qa,
"slot_diffs": slot_diffs,
"slot_diff_count": len(slot_diffs),
"baseline_quality_score": _path_qa_quality_score(baseline_qa),
"proposed_quality_score": _path_qa_quality_score(proposed_qa),
"path_qa": proposed_qa,
"proposed_quality_score": _path_qa_quality_score(fair_qa),
"proposed_pipeline_quality_score": _path_qa_quality_score(pipeline_qa),
"path_qa": fair_qa,
"steps": proposed_steps,
}
@ -2273,7 +2330,21 @@ def suggest_progression_path(
}
)
proposed = suggest_progression_path(cur, tenant=tenant, body=proposed_body)
return _build_progression_compare_response(baseline, proposed)
proposed_eval_payloads = _steps_to_evaluate_payloads(proposed.get("steps") or [])
proposed_eval: Optional[Dict[str, Any]] = None
if proposed_eval_payloads:
proposed_eval_body = body.model_copy(
update={
"evaluate_only": True,
"evaluate_steps": proposed_eval_payloads,
"compare_with_assignments": False,
"include_llm_intent": False,
"auto_rematch_after_qa": False,
"include_roadmap_preview": False,
}
)
proposed_eval = suggest_progression_path(cur, tenant=tenant, body=proposed_eval_body)
return _build_progression_compare_response(baseline, proposed, proposed_eval=proposed_eval)
goal_query = _normalize_query(body.query)
if len(goal_query) < 3:

View File

@ -0,0 +1,47 @@
"""Tests Vergleichs-Diffs (triviale ID-Tausche ausfiltern)."""
from planning_exercise_path_builder import (
_build_progression_slot_diffs,
_filter_trivial_slot_diffs,
)
def test_filter_trivial_slot_diffs_same_title_different_id():
diffs = [
{
"roadmap_major_step_index": 1,
"baseline_exercise_id": 10,
"baseline_title": "Rhythmuswechsel in der Kumite-Beinarbeit",
"proposed_exercise_id": 99,
"proposed_title": "Rhythmuswechsel in der Kumite-Beinarbeit",
}
]
assert _filter_trivial_slot_diffs(diffs) == []
def test_filter_trivial_slot_diffs_keeps_real_title_change():
diffs = [
{
"roadmap_major_step_index": 1,
"baseline_exercise_id": 10,
"baseline_title": "Alt",
"proposed_exercise_id": 99,
"proposed_title": "Neu",
}
]
filtered = _filter_trivial_slot_diffs(diffs)
assert len(filtered) == 1
assert filtered[0]["proposed_title"] == "Neu"
def test_build_slot_diffs_then_filter():
baseline = [
{"roadmap_major_step_index": 0, "exercise_id": 1, "title": "A"},
{"roadmap_major_step_index": 1, "exercise_id": 10, "title": "Gleich"},
]
proposed = [
{"roadmap_major_step_index": 0, "exercise_id": 1, "title": "A"},
{"roadmap_major_step_index": 1, "exercise_id": 77, "title": "Gleich"},
]
raw = _build_progression_slot_diffs(baseline, proposed)
assert len(raw) == 1
assert _filter_trivial_slot_diffs(raw) == []

View File

@ -505,8 +505,9 @@ export default function ProgressionGraphEditor({ graphId, embedded = false, onSa
setTargetSummary(res?.target_profile_summary || null)
const baselineQa = res?.baseline_path_qa || null
const proposedQa = res?.proposed_path_qa || res?.path_qa || null
const pipelineQa = res?.proposed_path_qa_pipeline || null
setPathQa(baselineQa)
setProposedPathQa(proposedQa)
setProposedPathQa(pipelineQa)
const openCompareDialog = (diffCount, noticePrefix) => {
setComparePayload(res)

View File

@ -37,11 +37,17 @@ export default function ProgressionOptimizeCompareModal({
const proposedQa = comparison.proposed_path_qa || comparison.path_qa
const baselinePct = pathQaQualityPercent(baselineQa)
const proposedPct = pathQaQualityPercent(proposedQa)
const rematchRounds = proposedQa?.rematch_rounds
const rematchCount = Array.isArray(proposedQa?.rematch_log) ? proposedQa.rematch_log.length : 0
const refineCount = Array.isArray(proposedQa?.refine_log) ? proposedQa.refine_log.length : 0
const hintCount = Number(proposedQa?.optimization_hint_count || 0)
const tierCount = Array.isArray(proposedQa?.qa_tiers) ? proposedQa.qa_tiers.length : 0
const pipelinePct = pathQaQualityPercent(comparison?.proposed_path_qa_pipeline)
const rematchRounds = comparison?.proposed_path_qa_pipeline?.rematch_rounds
?? proposedQa?.rematch_rounds
const pipelineQa = comparison?.proposed_path_qa_pipeline
const rematchCount = Array.isArray(pipelineQa?.rematch_log) ? pipelineQa.rematch_log.length : 0
const refineCount = Array.isArray(pipelineQa?.refine_log) ? pipelineQa.refine_log.length : 0
const hintCount = Number(pipelineQa?.optimization_hint_count || 0)
const tierCount = Array.isArray(pipelineQa?.qa_tiers) ? pipelineQa.qa_tiers.length : 0
const noMeaningfulDiffs = slotDiffs.length === 0
const proposedNotBetter =
proposedPct != null && baselinePct != null && proposedPct <= baselinePct
const toggle = (midx) => {
setSelected((prev) => {
@ -75,10 +81,42 @@ export default function ProgressionOptimizeCompareModal({
Optimierung vergleichen
</h3>
<p style={{ fontSize: '12px', color: 'var(--text3)', marginTop: 0, lineHeight: 1.45 }}>
Links dein aktueller Pfad, rechts der Vorschlag nach vollem Match inkl. Auto-Optimierung.
Wähle die Slots, die du übernehmen möchtest.
Vergleicht deinen Pfad mit dem End-Stand nach Match beide Seiten mit derselben Bewertungslogik
wie Graph bewerten. Auto-Rematch-Details stehen im Panel, nicht in der Prozentzahl.
</p>
{noMeaningfulDiffs || proposedNotBetter ? (
<div
style={{
marginBottom: '12px',
padding: '10px 12px',
borderRadius: '8px',
border: '1px solid color-mix(in srgb, var(--danger) 35%, var(--border))',
background: 'color-mix(in srgb, var(--danger) 8%, var(--surface2))',
fontSize: '12px',
lineHeight: 1.45,
}}
>
{noMeaningfulDiffs ? (
<strong>Keine inhaltlichen Slot-Änderungen</strong>
) : (
<strong>Vorschlag nicht besser als dein Pfad</strong>
)}
{noMeaningfulDiffs ? (
<p style={{ margin: '6px 0 0', color: 'var(--text2)' }}>
Rematch hat höchstens dieselben Übungen unter anderen IDs getroffen kein Grund zur
Übernahme. Bitte abbrechen.
</p>
) : (
<p style={{ margin: '6px 0 0', color: 'var(--text2)' }}>
Fair bewertet liefert der Vorschlag keinen höheren Pfad-QS-Wert. Die frühere niedrigere
Pipeline-Zahl{pipelinePct != null ? ` (${pipelinePct} %)` : ''} stammte aus dem
Rematch-Lauf, nicht aus dem sichtbaren End-Pfad.
</p>
)}
</div>
) : null}
<div
style={{
display: 'grid',
@ -140,7 +178,7 @@ export default function ProgressionOptimizeCompareModal({
{slotDiffs.length === 0 ? (
<p style={{ fontSize: '12px', color: 'var(--text2)' }}>
Keine abweichenden Slot-Zuordnungen der optimierte Lauf liefert denselben Pfad.
Keine inhaltlichen Abweichungen der End-Stand entspricht deinem Pfad.
</p>
) : (
<>
@ -187,9 +225,11 @@ export default function ProgressionOptimizeCompareModal({
>
<span style={{ color: 'var(--text2)' }}>
Bisher: {diff.baseline_title || '— leer —'}
{diff.baseline_exercise_id != null ? ` (#${diff.baseline_exercise_id})` : ''}
</span>
<span style={{ color: 'var(--accent-dark)' }}>
Neu: {diff.proposed_title || '— leer —'}
{diff.proposed_exercise_id != null ? ` (#${diff.proposed_exercise_id})` : ''}
</span>
</div>
</span>
@ -208,7 +248,7 @@ export default function ProgressionOptimizeCompareModal({
<button
type="button"
className="btn btn-primary"
disabled={applying || selected.size === 0 || slotDiffs.length === 0}
disabled={applying || selected.size === 0 || slotDiffs.length === 0 || proposedNotBetter}
onClick={() => onApplySelected([...selected])}
>
{applying ? 'Übernehmen …' : `Auswahl übernehmen (${selected.size})`}