Enhance Path QA and Progression Review Logic
All checks were successful
Deploy Development / deploy (push) Successful in 44s
Test Suite / pytest-backend (push) Successful in 44s
Test Suite / lint-backend (push) Successful in 1s
Test Suite / build-frontend (push) Successful in 14s
Test Suite / k6 /health Baseline (push) Successful in 41s
Test Suite / playwright-tests (push) Successful in 1m27s

- Introduced `_resolve_hint_major_index` to accurately map hints to major step indices, improving the handling of optimization hints in path evaluations.
- Added `_problematic_slots_from_path_qa` to identify and categorize problematic slots based on baseline QA, enhancing the quality assessment process.
- Updated `_slot_suggestion_accepted` to incorporate new parameters for slot problems and stage specifications, refining the decision-making process for slot suggestions.
- Enhanced `ProgressionGraphEditor` to improve user notifications regarding identified issues and suggestions, ensuring clearer communication of path evaluation results.
- Modified `buildProgressionComparePayload` and `buildUnifiedSlotReviewComparePayload` to support baseline evaluations, streamlining the comparison process for proposed paths.
This commit is contained in:
Lars 2026-06-13 10:39:52 +02:00
parent a1e4ad66df
commit 3468b2066e
4 changed files with 251 additions and 29 deletions

View File

@ -2456,6 +2456,107 @@ def _off_topic_slot_indices(path_qa: Optional[Mapping[str, Any]]) -> Set[int]:
return set(_off_topic_reasons_by_slot((path_qa or {}).get("off_topic_steps") or []).keys()) return set(_off_topic_reasons_by_slot((path_qa or {}).get("off_topic_steps") or []).keys())
def _resolve_hint_major_index(
hint: Mapping[str, Any],
stage_specs: Sequence[StageSpecArtifact],
) -> Optional[int]:
raw = hint.get("roadmap_major_step_index")
if raw is not None:
try:
return int(raw)
except (TypeError, ValueError):
return None
step_index = hint.get("step_index")
if step_index is None:
return None
try:
pos = int(step_index)
except (TypeError, ValueError):
return None
if 0 <= pos < len(stage_specs):
return int(stage_specs[pos].major_step_index)
return pos if pos >= 0 else None
def _problematic_slots_from_path_qa(
baseline_qa: Optional[Mapping[str, Any]],
baseline_steps: Sequence[Mapping[str, Any]],
stage_specs: Sequence[StageSpecArtifact],
) -> Dict[int, List[str]]:
"""Schachstellen aus derselben QS wie „Graph bewerten“ — Basis für Match-Vorschläge."""
problems: Dict[int, List[str]] = {}
def _add(midx: int, reason: str) -> None:
text = (reason or "").strip()
if not text:
return
bucket = problems.setdefault(int(midx), [])
if text not in bucket:
bucket.append(text[:400])
for midx, reasons in _off_topic_reasons_by_slot(
(baseline_qa or {}).get("off_topic_steps") or [],
).items():
for reason in reasons:
_add(midx, reason)
for hint in (baseline_qa or {}).get("optimization_hints") or []:
if not isinstance(hint, dict):
continue
action = str(hint.get("action") or "").strip().lower()
if action in ("review_roadmap", "refine_stage_spec"):
continue
midx = _resolve_hint_major_index(hint, stage_specs)
if midx is None:
continue
_add(
midx,
str(
hint.get("reason")
or hint.get("issue")
or hint.get("title")
or action
),
)
for raw in (baseline_qa or {}).get("issues") or []:
text = str(raw or "").strip()
if not text:
continue
for step in baseline_steps or []:
if not isinstance(step, dict):
continue
midx = step.get("roadmap_major_step_index")
if midx is None:
continue
try:
slot_no = int(midx) + 1
except (TypeError, ValueError):
continue
title = str(step.get("title") or "").strip()
if (
f"slot {slot_no}" in text.lower()
or f"stufe {slot_no}" in text.lower()
or (title and title.lower() in text.lower())
):
_add(int(midx), text)
for step in baseline_steps or []:
if not isinstance(step, dict):
continue
midx = step.get("roadmap_major_step_index")
if midx is None:
continue
try:
major_idx = int(midx)
except (TypeError, ValueError):
continue
if step.get("exercise_id") is None and not step.get("is_ai_proposal"):
_add(major_idx, "Leerer Slot ohne Bibliotheks-Übung")
return problems
def _slot_suggestion_accepted( def _slot_suggestion_accepted(
*, *,
baseline_qa: Optional[Mapping[str, Any]], baseline_qa: Optional[Mapping[str, Any]],
@ -2465,32 +2566,53 @@ def _slot_suggestion_accepted(
diff: Mapping[str, Any], diff: Mapping[str, Any],
off_topic: bool, off_topic: bool,
major_idx: int, major_idx: int,
slot_problem: bool = False,
stage_specs: Optional[Sequence[StageSpecArtifact]] = None,
baseline_steps: Optional[Sequence[Mapping[str, Any]]] = None,
projected_steps: Optional[Sequence[Mapping[str, Any]]] = None,
) -> bool: ) -> bool:
"""Entscheidet, ob ein Slot-Vorschlag in die Liste kommt.""" """Entscheidet, ob ein Slot-Vorschlag in die Liste kommt."""
base_id = diff.get("baseline_exercise_id") base_id = diff.get("baseline_exercise_id")
prop_id = diff.get("proposed_exercise_id") prop_id = diff.get("proposed_exercise_id")
base_off = _off_topic_slot_indices(baseline_qa) base_off = _off_topic_slot_indices(baseline_qa)
proj_off = _off_topic_slot_indices(projected_qa) proj_off = _off_topic_slot_indices(projected_qa)
delta = _quality_delta(baseline_score, projected_score)
if prop_id is not None and base_id is not None and int(base_id) == int(prop_id):
return False
if slot_problem and prop_id is not None:
if major_idx in base_off and major_idx not in proj_off:
return True
if delta is not None and delta >= -0.001:
return True
if stage_specs is not None:
proj_problems = _problematic_slots_from_path_qa(
projected_qa,
projected_steps or baseline_steps or [],
stage_specs,
)
if major_idx not in proj_problems:
return True
return True
if off_topic and base_id is not None: if off_topic and base_id is not None:
if major_idx in base_off and major_idx not in proj_off: if major_idx in base_off and major_idx not in proj_off:
return True return True
if major_idx in base_off and prop_id is not None: if prop_id is not None:
return _slot_diff_improves_path(diff, _quality_delta(baseline_score, projected_score), off_topic=True) return _slot_diff_improves_path(diff, delta, off_topic=True)
if base_id is None and prop_id is not None: if base_id is None and prop_id is not None:
return _slot_diff_improves_path(diff, _quality_delta(baseline_score, projected_score), off_topic=False) return _slot_diff_improves_path(diff, delta, off_topic=False)
if base_id is not None and prop_id is not None: if base_id is not None and prop_id is not None:
if int(base_id) == int(prop_id): return _slot_diff_improves_path(diff, delta, off_topic=False)
return False
return _slot_diff_improves_path(diff, _quality_delta(baseline_score, projected_score), off_topic=False)
if base_id is None and prop_id is None and diff.get("proposed_is_ai_proposal"): if base_id is None and prop_id is None and diff.get("proposed_is_ai_proposal"):
return _slot_diff_improves_path( return _slot_diff_improves_path(
diff, diff,
_quality_delta(baseline_score, projected_score), delta,
off_topic=off_topic or major_idx in base_off, off_topic=off_topic or major_idx in base_off or slot_problem,
) )
return False return False
@ -2900,6 +3022,11 @@ def _run_unified_slot_improvement_review(
baseline_score = _path_qa_quality_score(baseline_qa) baseline_score = _path_qa_quality_score(baseline_qa)
gap_fill_offers = list(qa_pack.get("gap_fill_offers") or []) gap_fill_offers = list(qa_pack.get("gap_fill_offers") or [])
off_topic_map = _off_topic_reasons_by_slot(baseline_qa.get("off_topic_steps") or []) off_topic_map = _off_topic_reasons_by_slot(baseline_qa.get("off_topic_steps") or [])
problem_slots = _problematic_slots_from_path_qa(
baseline_qa,
baseline_steps,
roadmap_ctx.stage_specs,
)
steps_by_major = _steps_by_major_index(baseline_steps) steps_by_major = _steps_by_major_index(baseline_steps)
spec_by_major = {int(s.major_step_index): s for s in roadmap_ctx.stage_specs} spec_by_major = {int(s.major_step_index): s for s in roadmap_ctx.stage_specs}
@ -2913,10 +3040,11 @@ def _run_unified_slot_improvement_review(
current = dict(steps_by_major.get(major_idx, {})) current = dict(steps_by_major.get(major_idx, {}))
current.setdefault("roadmap_major_step_index", major_idx) current.setdefault("roadmap_major_step_index", major_idx)
current_id = current.get("exercise_id") current_id = current.get("exercise_id")
off_topic = major_idx in off_topic_map or bool( slot_problem = major_idx in problem_slots
off_topic = slot_problem or major_idx in off_topic_map or bool(
current.get("slot_status") in {"off_topic", "stripped"} current.get("slot_status") in {"off_topic", "stripped"}
) )
off_reasons = off_topic_map.get(major_idx, []) off_reasons = list(problem_slots.get(major_idx, [])) + off_topic_map.get(major_idx, [])
planned_ids = [ planned_ids = [
int(s["exercise_id"]) int(s["exercise_id"])
@ -2937,7 +3065,12 @@ def _run_unified_slot_improvement_review(
anchor_variant_id = int(vid) if vid is not None else None anchor_variant_id = int(vid) if vid is not None else None
exclude_id: Optional[int] = None exclude_id: Optional[int] = None
if current_id is not None and not off_topic: if current_id is not None and not (off_topic or slot_problem):
try:
exclude_id = int(current_id)
except (TypeError, ValueError):
exclude_id = None
elif current_id is not None and (off_topic or slot_problem):
try: try:
exclude_id = int(current_id) exclude_id = int(current_id)
except (TypeError, ValueError): except (TypeError, ValueError):
@ -2972,7 +3105,7 @@ def _run_unified_slot_improvement_review(
continue continue
if ( if (
current_id is not None current_id is not None
and not off_topic and not (off_topic or slot_problem)
and int(current_id) == cand_id and int(current_id) == cand_id
): ):
continue continue
@ -3006,10 +3139,14 @@ def _run_unified_slot_improvement_review(
diff=diff_stub, diff=diff_stub,
off_topic=off_topic, off_topic=off_topic,
major_idx=major_idx, major_idx=major_idx,
slot_problem=slot_problem,
stage_specs=roadmap_ctx.stage_specs,
baseline_steps=baseline_steps,
projected_steps=merged_steps,
) )
suggestion_type = ( suggestion_type = (
"remove_and_replace" "remove_and_replace"
if off_topic and current_id is not None if (off_topic or slot_problem) and current_id is not None
else ("library_fill" if current_id is None else "library_improvement") else ("library_fill" if current_id is None else "library_improvement")
) )
entry = { entry = {
@ -3023,6 +3160,8 @@ def _run_unified_slot_improvement_review(
"projected_path_qa": projected_qa, "projected_path_qa": projected_qa,
"improves_path": improves, "improves_path": improves,
"off_topic": off_topic, "off_topic": off_topic,
"slot_problem": slot_problem,
"problem_reasons": off_reasons[:6],
"proposed_is_ai_proposal": False, "proposed_is_ai_proposal": False,
"pro_contra": _build_slot_pro_contra( "pro_contra": _build_slot_pro_contra(
current_step=current, current_step=current,
@ -3048,6 +3187,7 @@ def _run_unified_slot_improvement_review(
needs_ai = ( needs_ai = (
current_id is None current_id is None
or off_topic or off_topic
or slot_problem
or bool(current.get("is_ai_proposal")) or bool(current.get("is_ai_proposal"))
) )
if not needs_ai or not body.include_ai_gap_fill: if not needs_ai or not body.include_ai_gap_fill:
@ -3115,8 +3255,12 @@ def _run_unified_slot_improvement_review(
baseline_score=baseline_score, baseline_score=baseline_score,
projected_score=projected_score, projected_score=projected_score,
diff=diff_stub, diff=diff_stub,
off_topic=off_topic or major_idx in _off_topic_slot_indices(baseline_qa), off_topic=off_topic,
major_idx=major_idx, major_idx=major_idx,
slot_problem=slot_problem,
stage_specs=roadmap_ctx.stage_specs,
baseline_steps=baseline_steps,
projected_steps=merged_steps,
) )
entry = { entry = {
**diff_stub, **diff_stub,
@ -3127,8 +3271,10 @@ def _run_unified_slot_improvement_review(
"projected_quality_score": projected_score, "projected_quality_score": projected_score,
"baseline_quality_score": baseline_score, "baseline_quality_score": baseline_score,
"projected_path_qa": projected_qa, "projected_path_qa": projected_qa,
"improves_path": improves, "improves_path": improves or slot_problem,
"off_topic": off_topic, "off_topic": off_topic,
"slot_problem": slot_problem,
"problem_reasons": off_reasons[:6],
"proposed_is_ai_proposal": True, "proposed_is_ai_proposal": True,
"gap_offer": slot_offer, "gap_offer": slot_offer,
"pro_contra": _build_slot_pro_contra( "pro_contra": _build_slot_pro_contra(
@ -3143,12 +3289,16 @@ def _run_unified_slot_improvement_review(
gap_offer=slot_offer, gap_offer=slot_offer,
), ),
} }
if improves: if improves or slot_problem:
entry["improves_path"] = True
suggestions.append(entry) suggestions.append(entry)
else: else:
rejected.append(entry) rejected.append(entry)
improvement_diffs = [_suggestion_as_slot_diff(s) for s in suggestions] improvement_diffs = [_suggestion_as_slot_diff(s) for s in suggestions]
problem_slot_payload = {
str(k): v for k, v in sorted(problem_slots.items(), key=lambda x: x[0])
}
slot_diff_scoring = { slot_diff_scoring = {
"baseline_quality_score": baseline_score, "baseline_quality_score": baseline_score,
"scored_diffs": improvement_diffs + [_suggestion_as_slot_diff(r) for r in rejected], "scored_diffs": improvement_diffs + [_suggestion_as_slot_diff(r) for r in rejected],
@ -3182,9 +3332,11 @@ def _run_unified_slot_improvement_review(
"unified_slot_review": True, "unified_slot_review": True,
"suggestion_count": len(suggestions), "suggestion_count": len(suggestions),
"rejected_count": len(rejected), "rejected_count": len(rejected),
"problem_slot_count": len(problem_slots),
}, },
"retrieval_phase": "unified_slot_review", "retrieval_phase": "unified_slot_review",
"unified_slot_review": True, "unified_slot_review": True,
"problem_slots": problem_slot_payload,
"slot_suggestions": suggestions, "slot_suggestions": suggestions,
"slot_diff_scoring": slot_diff_scoring, "slot_diff_scoring": slot_diff_scoring,
"comparison_mode": True, "comparison_mode": True,

View File

@ -0,0 +1,53 @@
"""Schachstellen-Erkennung für unified Slot-Review."""
from planning_exercise_path_builder import (
_problematic_slots_from_path_qa,
_slot_suggestion_accepted,
)
from planning_progression_roadmap import StageSpecArtifact
def _spec(midx: int) -> StageSpecArtifact:
return StageSpecArtifact(
major_step_index=midx,
learning_goal=f"Lernziel Slot {midx + 1}",
load_profile=[],
exercise_type="",
success_criteria=[],
anti_patterns=[],
)
def test_problematic_slots_from_optimization_hints():
qa = {
"optimization_hints": [
{
"action": "rematch_slot",
"step_index": 1,
"issue": "stage_mismatch",
"reason": "Übung passt nicht zur Stufe",
}
],
"off_topic_steps": [],
}
steps = [
{"roadmap_major_step_index": 0, "exercise_id": 1, "title": "A"},
{"roadmap_major_step_index": 1, "exercise_id": 2, "title": "B"},
]
specs = [_spec(0), _spec(1)]
problems = _problematic_slots_from_path_qa(qa, steps, specs)
assert 1 in problems
assert any("Stufe" in r or "passt" in r for r in problems[1])
def test_slot_suggestion_accepted_for_problem_slot():
diff = {"baseline_exercise_id": 10, "proposed_exercise_id": 99}
assert _slot_suggestion_accepted(
baseline_qa={"optimization_hints": [{"action": "rematch_slot", "roadmap_major_step_index": 1}]},
projected_qa={"optimization_hints": []},
baseline_score=0.7,
projected_score=0.7,
diff=diff,
off_topic=False,
major_idx=1,
slot_problem=True,
)

View File

@ -494,20 +494,31 @@ export default function ProgressionGraphEditor({ graphId, embedded = false, onSa
} }
const runMatchCompareFlow = async (synced, { source = 'match' } = {}) => { const runMatchCompareFlow = async (synced, { source = 'match' } = {}) => {
setMatchNotice('Pfad bewerten und je Slot passende Verbesserungen prüfen…') setMatchNotice('Schritt 1/2: Pfad bewerten (wie „Graph bewerten“)…')
const baselineRes = await fetchPathEvaluate(synced)
const { draft: evaluated, remainingOffers } = applyEvaluateResult(synced, baselineRes)
setDraft(evaluated)
const mergedAfterEval = mergeGapOffersForDraft(evaluated, baselineRes)
setGapFillOffers(mergedAfterEval.length > 0 ? mergedAfterEval : remainingOffers)
setMatchNotice('Schritt 2/2: Verbesserungsvorschläge für gemeldete Schachstellen…')
const reviewRes = await api.suggestProgressionPath({ const reviewRes = await api.suggestProgressionPath({
...buildMatchRequestBase(synced), ...buildEvaluateRequest(synced),
evaluate_only: false,
unified_slot_review: true, unified_slot_review: true,
baseline_evaluate_steps: slotsToEvaluateSteps(synced), baseline_evaluate_steps: slotsToEvaluateSteps(synced),
include_llm_intent: false, include_llm_intent: false,
auto_rematch_after_qa: false, auto_rematch_after_qa: false,
}) })
const qa = reviewRes?.path_qa || null
setPathQa(qa)
setDraft((prev) => (prev ? { ...prev, lastFindings: qa } : prev))
const compareRes = buildProgressionComparePayload(null, reviewRes) if (!reviewRes?.unified_slot_review) {
setGapFillOffers(mergeGapOffersForDraft(synced, reviewRes, reviewRes)) throw new Error(
'Match-Review nicht verfügbar — Backend-Stand prüfen (unified_slot_review fehlt in der Antwort).',
)
}
const compareRes = buildProgressionComparePayload(baselineRes, reviewRes)
setGapFillOffers(mergeGapOffersForDraft(evaluated, baselineRes, reviewRes))
presentMatchCompare(compareRes, { source }) presentMatchCompare(compareRes, { source })
return compareRes return compareRes
} }
@ -523,11 +534,15 @@ export default function ProgressionGraphEditor({ graphId, embedded = false, onSa
const baselineQa = res?.baseline_path_qa || null const baselineQa = res?.baseline_path_qa || null
const diffCount = res?.slot_diff_count ?? compareDiffsForDialog(res).length const diffCount = res?.slot_diff_count ?? compareDiffsForDialog(res).length
const rejectedCount = res?.slot_diff_count_rejected ?? rejectedCompareDiffs(res).length const rejectedCount = res?.slot_diff_count_rejected ?? rejectedCompareDiffs(res).length
const problemCount = res?.match_summary?.problem_slot_count
?? (res?.problem_slots ? Object.keys(res.problem_slots).length : 0)
const bPct = pathQaQualityPercent(baselineQa) const bPct = pathQaQualityPercent(baselineQa)
let notice = let notice =
diffCount > 0 diffCount > 0
? `Match: ${diffCount} Verbesserung(en) — je Slot gegen deinen Pfad (${bPct != null ? `${bPct} %` : 'QS'}) geprüft.` ? `Match: ${diffCount} Verbesserung(en) für gemeldete Schachstellen.`
: 'Match: Keine messbare Verbesserung gegenüber deinem Pfad.' : problemCount > 0
? `Match: ${problemCount} Schachstelle(n) erkannt, aber kein Bibliotheks-Ersatz mit Gewinn — KI-Angebote im Panel prüfen.`
: 'Match: Keine Schachstellen — Pfad wirkt konsistent.'
if (rejectedCount > 0) { if (rejectedCount > 0) {
notice += ` ${rejectedCount} Vorschlag/Vorschläge verworfen (Verschlechterung oder neutral).` notice += ` ${rejectedCount} Vorschlag/Vorschläge verworfen (Verschlechterung oder neutral).`
} }

View File

@ -1091,7 +1091,7 @@ function mergeGapFillOffersFromSteps(steps, offers) {
*/ */
export function buildProgressionComparePayload(baselineRes, proposedRes) { export function buildProgressionComparePayload(baselineRes, proposedRes) {
if (proposedRes?.unified_slot_review) { if (proposedRes?.unified_slot_review) {
return buildUnifiedSlotReviewComparePayload(proposedRes) return buildUnifiedSlotReviewComparePayload(proposedRes, baselineRes)
} }
const baselineSteps = Array.isArray(baselineRes?.steps) ? baselineRes.steps : [] const baselineSteps = Array.isArray(baselineRes?.steps) ? baselineRes.steps : []
@ -1154,9 +1154,11 @@ export function buildProgressionComparePayload(baselineRes, proposedRes) {
} }
/** Einheitlicher Match-Review (Bewertung + Slot-Vorschläge in einem Lauf). */ /** Einheitlicher Match-Review (Bewertung + Slot-Vorschläge in einem Lauf). */
export function buildUnifiedSlotReviewComparePayload(res) { export function buildUnifiedSlotReviewComparePayload(res, baselineRes = null) {
const baselineSteps = Array.isArray(res?.baseline_steps) ? res.baseline_steps : (res?.steps || []) const baselineSteps = Array.isArray(baselineRes?.steps)
const baselineQa = res?.baseline_path_qa || res?.path_qa || null ? baselineRes.steps
: (Array.isArray(res?.baseline_steps) ? res.baseline_steps : (res?.steps || []))
const baselineQa = baselineRes?.path_qa || res?.baseline_path_qa || res?.path_qa || null
const scoring = res?.slot_diff_scoring const scoring = res?.slot_diff_scoring
const suggestions = Array.isArray(res?.slot_suggestions) ? res.slot_suggestions : [] const suggestions = Array.isArray(res?.slot_suggestions) ? res.slot_suggestions : []
const improving = suggestions.filter((s) => s?.improves_path) const improving = suggestions.filter((s) => s?.improves_path)