Enhance Progression Path Evaluation and Comparison Logic
All checks were successful
Deploy Development / deploy (push) Successful in 45s
Test Suite / pytest-backend (push) Successful in 45s
Test Suite / lint-backend (push) Successful in 0s
Test Suite / build-frontend (push) Successful in 15s
Test Suite / k6 /health Baseline (push) Successful in 38s
Test Suite / playwright-tests (push) Successful in 1m23s
All checks were successful
Deploy Development / deploy (push) Successful in 45s
Test Suite / pytest-backend (push) Successful in 45s
Test Suite / lint-backend (push) Successful in 0s
Test Suite / build-frontend (push) Successful in 15s
Test Suite / k6 /health Baseline (push) Successful in 38s
Test Suite / playwright-tests (push) Successful in 1m23s
- Introduced `_steps_to_evaluate_payloads` to convert path steps into evaluation payloads for improved quality assessments. - Updated `_build_progression_compare_response` to include a new `proposed_eval` parameter, allowing for fair quality assessment comparisons. - Enhanced `ProgressionGraphEditor` to utilize the new pipeline quality assessment data. - Modified `ProgressionOptimizeCompareModal` to display detailed comparison results, including handling of trivial slot differences and optimization hints. - Bumped version to reflect the new features and improvements.
This commit is contained in:
parent
5bca5ef9eb
commit
e828a5da32
|
|
@ -2176,6 +2176,52 @@ def _steps_by_major_index(steps: Sequence[Mapping[str, Any]]) -> Dict[int, Dict[
|
||||||
return out
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def _steps_to_evaluate_payloads(steps: Sequence[Mapping[str, Any]]) -> List[EvaluateStepPayload]:
|
||||||
|
"""Pfad-Schritte → evaluate_steps (für faire QS auf dem End-Stand)."""
|
||||||
|
payloads: List[EvaluateStepPayload] = []
|
||||||
|
for step in steps or []:
|
||||||
|
if not isinstance(step, dict):
|
||||||
|
continue
|
||||||
|
midx = step.get("roadmap_major_step_index")
|
||||||
|
if midx is None:
|
||||||
|
continue
|
||||||
|
eid = step.get("exercise_id")
|
||||||
|
is_proposal = bool(step.get("is_ai_proposal")) or eid is None
|
||||||
|
payloads.append(
|
||||||
|
EvaluateStepPayload(
|
||||||
|
exercise_id=int(eid) if eid is not None and not is_proposal else None,
|
||||||
|
variant_id=step.get("variant_id"),
|
||||||
|
title=step.get("title"),
|
||||||
|
is_ai_proposal=is_proposal,
|
||||||
|
ai_suggestion=step.get("ai_suggestion") if isinstance(step.get("ai_suggestion"), dict) else None,
|
||||||
|
proposal_key=step.get("proposal_key"),
|
||||||
|
roadmap_major_step_index=int(midx),
|
||||||
|
roadmap_phase=step.get("roadmap_phase"),
|
||||||
|
roadmap_learning_goal=step.get("roadmap_learning_goal"),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
payloads.sort(key=lambda p: int(p.roadmap_major_step_index or 0))
|
||||||
|
return payloads
|
||||||
|
|
||||||
|
|
||||||
|
def _normalize_slot_title(title: Optional[str]) -> str:
|
||||||
|
return (title or "").strip().casefold()
|
||||||
|
|
||||||
|
|
||||||
|
def _filter_trivial_slot_diffs(diffs: Sequence[Mapping[str, Any]]) -> List[Dict[str, Any]]:
|
||||||
|
"""Gleicher sichtbarer Titel = kein inhaltlicher Wechsel (nur ID-Doppel in der Bibliothek)."""
|
||||||
|
out: List[Dict[str, Any]] = []
|
||||||
|
for raw in diffs or []:
|
||||||
|
if not isinstance(raw, dict):
|
||||||
|
continue
|
||||||
|
bt = _normalize_slot_title(raw.get("baseline_title"))
|
||||||
|
pt = _normalize_slot_title(raw.get("proposed_title"))
|
||||||
|
if bt and pt and bt == pt:
|
||||||
|
continue
|
||||||
|
out.append(dict(raw))
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
def _build_progression_slot_diffs(
|
def _build_progression_slot_diffs(
|
||||||
baseline_steps: Sequence[Mapping[str, Any]],
|
baseline_steps: Sequence[Mapping[str, Any]],
|
||||||
proposed_steps: Sequence[Mapping[str, Any]],
|
proposed_steps: Sequence[Mapping[str, Any]],
|
||||||
|
|
@ -2211,24 +2257,35 @@ def _build_progression_slot_diffs(
|
||||||
def _build_progression_compare_response(
|
def _build_progression_compare_response(
|
||||||
baseline: Mapping[str, Any],
|
baseline: Mapping[str, Any],
|
||||||
proposed: Mapping[str, Any],
|
proposed: Mapping[str, Any],
|
||||||
|
*,
|
||||||
|
proposed_eval: Optional[Mapping[str, Any]] = None,
|
||||||
) -> Dict[str, Any]:
|
) -> Dict[str, Any]:
|
||||||
baseline_steps = list(baseline.get("steps") or [])
|
baseline_steps = list(baseline.get("steps") or [])
|
||||||
proposed_steps = list(proposed.get("steps") or [])
|
proposed_steps = list(proposed.get("steps") or [])
|
||||||
baseline_qa = baseline.get("path_qa") if isinstance(baseline.get("path_qa"), dict) else {}
|
baseline_qa = baseline.get("path_qa") if isinstance(baseline.get("path_qa"), dict) else {}
|
||||||
proposed_qa = proposed.get("path_qa") if isinstance(proposed.get("path_qa"), dict) else {}
|
pipeline_qa = proposed.get("path_qa") if isinstance(proposed.get("path_qa"), dict) else {}
|
||||||
slot_diffs = _build_progression_slot_diffs(baseline_steps, proposed_steps)
|
fair_qa = (
|
||||||
|
proposed_eval.get("path_qa")
|
||||||
|
if isinstance(proposed_eval, dict) and isinstance(proposed_eval.get("path_qa"), dict)
|
||||||
|
else pipeline_qa
|
||||||
|
)
|
||||||
|
slot_diffs = _filter_trivial_slot_diffs(
|
||||||
|
_build_progression_slot_diffs(baseline_steps, proposed_steps),
|
||||||
|
)
|
||||||
return {
|
return {
|
||||||
**dict(proposed),
|
**dict(proposed),
|
||||||
"comparison_mode": True,
|
"comparison_mode": True,
|
||||||
"baseline_steps": baseline_steps,
|
"baseline_steps": baseline_steps,
|
||||||
"baseline_path_qa": baseline_qa,
|
"baseline_path_qa": baseline_qa,
|
||||||
"proposed_steps": proposed_steps,
|
"proposed_steps": proposed_steps,
|
||||||
"proposed_path_qa": proposed_qa,
|
"proposed_path_qa": fair_qa,
|
||||||
|
"proposed_path_qa_pipeline": pipeline_qa,
|
||||||
"slot_diffs": slot_diffs,
|
"slot_diffs": slot_diffs,
|
||||||
"slot_diff_count": len(slot_diffs),
|
"slot_diff_count": len(slot_diffs),
|
||||||
"baseline_quality_score": _path_qa_quality_score(baseline_qa),
|
"baseline_quality_score": _path_qa_quality_score(baseline_qa),
|
||||||
"proposed_quality_score": _path_qa_quality_score(proposed_qa),
|
"proposed_quality_score": _path_qa_quality_score(fair_qa),
|
||||||
"path_qa": proposed_qa,
|
"proposed_pipeline_quality_score": _path_qa_quality_score(pipeline_qa),
|
||||||
|
"path_qa": fair_qa,
|
||||||
"steps": proposed_steps,
|
"steps": proposed_steps,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -2273,7 +2330,21 @@ def suggest_progression_path(
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
proposed = suggest_progression_path(cur, tenant=tenant, body=proposed_body)
|
proposed = suggest_progression_path(cur, tenant=tenant, body=proposed_body)
|
||||||
return _build_progression_compare_response(baseline, proposed)
|
proposed_eval_payloads = _steps_to_evaluate_payloads(proposed.get("steps") or [])
|
||||||
|
proposed_eval: Optional[Dict[str, Any]] = None
|
||||||
|
if proposed_eval_payloads:
|
||||||
|
proposed_eval_body = body.model_copy(
|
||||||
|
update={
|
||||||
|
"evaluate_only": True,
|
||||||
|
"evaluate_steps": proposed_eval_payloads,
|
||||||
|
"compare_with_assignments": False,
|
||||||
|
"include_llm_intent": False,
|
||||||
|
"auto_rematch_after_qa": False,
|
||||||
|
"include_roadmap_preview": False,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
proposed_eval = suggest_progression_path(cur, tenant=tenant, body=proposed_eval_body)
|
||||||
|
return _build_progression_compare_response(baseline, proposed, proposed_eval=proposed_eval)
|
||||||
|
|
||||||
goal_query = _normalize_query(body.query)
|
goal_query = _normalize_query(body.query)
|
||||||
if len(goal_query) < 3:
|
if len(goal_query) < 3:
|
||||||
|
|
|
||||||
47
backend/tests/test_planning_compare_slot_diffs.py
Normal file
47
backend/tests/test_planning_compare_slot_diffs.py
Normal file
|
|
@ -0,0 +1,47 @@
|
||||||
|
"""Tests Vergleichs-Diffs (triviale ID-Tausche ausfiltern)."""
|
||||||
|
from planning_exercise_path_builder import (
|
||||||
|
_build_progression_slot_diffs,
|
||||||
|
_filter_trivial_slot_diffs,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_filter_trivial_slot_diffs_same_title_different_id():
|
||||||
|
diffs = [
|
||||||
|
{
|
||||||
|
"roadmap_major_step_index": 1,
|
||||||
|
"baseline_exercise_id": 10,
|
||||||
|
"baseline_title": "Rhythmuswechsel in der Kumite-Beinarbeit",
|
||||||
|
"proposed_exercise_id": 99,
|
||||||
|
"proposed_title": "Rhythmuswechsel in der Kumite-Beinarbeit",
|
||||||
|
}
|
||||||
|
]
|
||||||
|
assert _filter_trivial_slot_diffs(diffs) == []
|
||||||
|
|
||||||
|
|
||||||
|
def test_filter_trivial_slot_diffs_keeps_real_title_change():
|
||||||
|
diffs = [
|
||||||
|
{
|
||||||
|
"roadmap_major_step_index": 1,
|
||||||
|
"baseline_exercise_id": 10,
|
||||||
|
"baseline_title": "Alt",
|
||||||
|
"proposed_exercise_id": 99,
|
||||||
|
"proposed_title": "Neu",
|
||||||
|
}
|
||||||
|
]
|
||||||
|
filtered = _filter_trivial_slot_diffs(diffs)
|
||||||
|
assert len(filtered) == 1
|
||||||
|
assert filtered[0]["proposed_title"] == "Neu"
|
||||||
|
|
||||||
|
|
||||||
|
def test_build_slot_diffs_then_filter():
|
||||||
|
baseline = [
|
||||||
|
{"roadmap_major_step_index": 0, "exercise_id": 1, "title": "A"},
|
||||||
|
{"roadmap_major_step_index": 1, "exercise_id": 10, "title": "Gleich"},
|
||||||
|
]
|
||||||
|
proposed = [
|
||||||
|
{"roadmap_major_step_index": 0, "exercise_id": 1, "title": "A"},
|
||||||
|
{"roadmap_major_step_index": 1, "exercise_id": 77, "title": "Gleich"},
|
||||||
|
]
|
||||||
|
raw = _build_progression_slot_diffs(baseline, proposed)
|
||||||
|
assert len(raw) == 1
|
||||||
|
assert _filter_trivial_slot_diffs(raw) == []
|
||||||
|
|
@ -505,8 +505,9 @@ export default function ProgressionGraphEditor({ graphId, embedded = false, onSa
|
||||||
setTargetSummary(res?.target_profile_summary || null)
|
setTargetSummary(res?.target_profile_summary || null)
|
||||||
const baselineQa = res?.baseline_path_qa || null
|
const baselineQa = res?.baseline_path_qa || null
|
||||||
const proposedQa = res?.proposed_path_qa || res?.path_qa || null
|
const proposedQa = res?.proposed_path_qa || res?.path_qa || null
|
||||||
|
const pipelineQa = res?.proposed_path_qa_pipeline || null
|
||||||
setPathQa(baselineQa)
|
setPathQa(baselineQa)
|
||||||
setProposedPathQa(proposedQa)
|
setProposedPathQa(pipelineQa)
|
||||||
|
|
||||||
const openCompareDialog = (diffCount, noticePrefix) => {
|
const openCompareDialog = (diffCount, noticePrefix) => {
|
||||||
setComparePayload(res)
|
setComparePayload(res)
|
||||||
|
|
|
||||||
|
|
@ -37,11 +37,17 @@ export default function ProgressionOptimizeCompareModal({
|
||||||
const proposedQa = comparison.proposed_path_qa || comparison.path_qa
|
const proposedQa = comparison.proposed_path_qa || comparison.path_qa
|
||||||
const baselinePct = pathQaQualityPercent(baselineQa)
|
const baselinePct = pathQaQualityPercent(baselineQa)
|
||||||
const proposedPct = pathQaQualityPercent(proposedQa)
|
const proposedPct = pathQaQualityPercent(proposedQa)
|
||||||
const rematchRounds = proposedQa?.rematch_rounds
|
const pipelinePct = pathQaQualityPercent(comparison?.proposed_path_qa_pipeline)
|
||||||
const rematchCount = Array.isArray(proposedQa?.rematch_log) ? proposedQa.rematch_log.length : 0
|
const rematchRounds = comparison?.proposed_path_qa_pipeline?.rematch_rounds
|
||||||
const refineCount = Array.isArray(proposedQa?.refine_log) ? proposedQa.refine_log.length : 0
|
?? proposedQa?.rematch_rounds
|
||||||
const hintCount = Number(proposedQa?.optimization_hint_count || 0)
|
const pipelineQa = comparison?.proposed_path_qa_pipeline
|
||||||
const tierCount = Array.isArray(proposedQa?.qa_tiers) ? proposedQa.qa_tiers.length : 0
|
const rematchCount = Array.isArray(pipelineQa?.rematch_log) ? pipelineQa.rematch_log.length : 0
|
||||||
|
const refineCount = Array.isArray(pipelineQa?.refine_log) ? pipelineQa.refine_log.length : 0
|
||||||
|
const hintCount = Number(pipelineQa?.optimization_hint_count || 0)
|
||||||
|
const tierCount = Array.isArray(pipelineQa?.qa_tiers) ? pipelineQa.qa_tiers.length : 0
|
||||||
|
const noMeaningfulDiffs = slotDiffs.length === 0
|
||||||
|
const proposedNotBetter =
|
||||||
|
proposedPct != null && baselinePct != null && proposedPct <= baselinePct
|
||||||
|
|
||||||
const toggle = (midx) => {
|
const toggle = (midx) => {
|
||||||
setSelected((prev) => {
|
setSelected((prev) => {
|
||||||
|
|
@ -75,10 +81,42 @@ export default function ProgressionOptimizeCompareModal({
|
||||||
Optimierung vergleichen
|
Optimierung vergleichen
|
||||||
</h3>
|
</h3>
|
||||||
<p style={{ fontSize: '12px', color: 'var(--text3)', marginTop: 0, lineHeight: 1.45 }}>
|
<p style={{ fontSize: '12px', color: 'var(--text3)', marginTop: 0, lineHeight: 1.45 }}>
|
||||||
Links dein aktueller Pfad, rechts der Vorschlag nach vollem Match inkl. Auto-Optimierung.
|
Vergleicht deinen Pfad mit dem End-Stand nach Match — beide Seiten mit derselben Bewertungslogik
|
||||||
Wähle die Slots, die du übernehmen möchtest.
|
wie „Graph bewerten“. Auto-Rematch-Details stehen im Panel, nicht in der Prozentzahl.
|
||||||
</p>
|
</p>
|
||||||
|
|
||||||
|
{noMeaningfulDiffs || proposedNotBetter ? (
|
||||||
|
<div
|
||||||
|
style={{
|
||||||
|
marginBottom: '12px',
|
||||||
|
padding: '10px 12px',
|
||||||
|
borderRadius: '8px',
|
||||||
|
border: '1px solid color-mix(in srgb, var(--danger) 35%, var(--border))',
|
||||||
|
background: 'color-mix(in srgb, var(--danger) 8%, var(--surface2))',
|
||||||
|
fontSize: '12px',
|
||||||
|
lineHeight: 1.45,
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
{noMeaningfulDiffs ? (
|
||||||
|
<strong>Keine inhaltlichen Slot-Änderungen</strong>
|
||||||
|
) : (
|
||||||
|
<strong>Vorschlag nicht besser als dein Pfad</strong>
|
||||||
|
)}
|
||||||
|
{noMeaningfulDiffs ? (
|
||||||
|
<p style={{ margin: '6px 0 0', color: 'var(--text2)' }}>
|
||||||
|
Rematch hat höchstens dieselben Übungen unter anderen IDs getroffen — kein Grund zur
|
||||||
|
Übernahme. Bitte abbrechen.
|
||||||
|
</p>
|
||||||
|
) : (
|
||||||
|
<p style={{ margin: '6px 0 0', color: 'var(--text2)' }}>
|
||||||
|
Fair bewertet liefert der Vorschlag keinen höheren Pfad-QS-Wert. Die frühere niedrigere
|
||||||
|
Pipeline-Zahl{pipelinePct != null ? ` (${pipelinePct} %)` : ''} stammte aus dem
|
||||||
|
Rematch-Lauf, nicht aus dem sichtbaren End-Pfad.
|
||||||
|
</p>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
) : null}
|
||||||
|
|
||||||
<div
|
<div
|
||||||
style={{
|
style={{
|
||||||
display: 'grid',
|
display: 'grid',
|
||||||
|
|
@ -140,7 +178,7 @@ export default function ProgressionOptimizeCompareModal({
|
||||||
|
|
||||||
{slotDiffs.length === 0 ? (
|
{slotDiffs.length === 0 ? (
|
||||||
<p style={{ fontSize: '12px', color: 'var(--text2)' }}>
|
<p style={{ fontSize: '12px', color: 'var(--text2)' }}>
|
||||||
Keine abweichenden Slot-Zuordnungen — der optimierte Lauf liefert denselben Pfad.
|
Keine inhaltlichen Abweichungen — der End-Stand entspricht deinem Pfad.
|
||||||
</p>
|
</p>
|
||||||
) : (
|
) : (
|
||||||
<>
|
<>
|
||||||
|
|
@ -187,9 +225,11 @@ export default function ProgressionOptimizeCompareModal({
|
||||||
>
|
>
|
||||||
<span style={{ color: 'var(--text2)' }}>
|
<span style={{ color: 'var(--text2)' }}>
|
||||||
Bisher: {diff.baseline_title || '— leer —'}
|
Bisher: {diff.baseline_title || '— leer —'}
|
||||||
|
{diff.baseline_exercise_id != null ? ` (#${diff.baseline_exercise_id})` : ''}
|
||||||
</span>
|
</span>
|
||||||
<span style={{ color: 'var(--accent-dark)' }}>
|
<span style={{ color: 'var(--accent-dark)' }}>
|
||||||
Neu: {diff.proposed_title || '— leer —'}
|
Neu: {diff.proposed_title || '— leer —'}
|
||||||
|
{diff.proposed_exercise_id != null ? ` (#${diff.proposed_exercise_id})` : ''}
|
||||||
</span>
|
</span>
|
||||||
</div>
|
</div>
|
||||||
</span>
|
</span>
|
||||||
|
|
@ -208,7 +248,7 @@ export default function ProgressionOptimizeCompareModal({
|
||||||
<button
|
<button
|
||||||
type="button"
|
type="button"
|
||||||
className="btn btn-primary"
|
className="btn btn-primary"
|
||||||
disabled={applying || selected.size === 0 || slotDiffs.length === 0}
|
disabled={applying || selected.size === 0 || slotDiffs.length === 0 || proposedNotBetter}
|
||||||
onClick={() => onApplySelected([...selected])}
|
onClick={() => onApplySelected([...selected])}
|
||||||
>
|
>
|
||||||
{applying ? 'Übernehmen …' : `Auswahl übernehmen (${selected.size})`}
|
{applying ? 'Übernehmen …' : `Auswahl übernehmen (${selected.size})`}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user