Enhance Path QA and Progression Review Logic

- Introduced `_resolve_hint_major_index` to accurately map hints to major step indices, improving the handling of optimization hints in path evaluations. - Added `_problematic_slots_from_path_qa` to identify and categorize problematic slots based on baseline QA, enhancing the quality assessment process. - Updated `_slot_suggestion_accepted` to incorporate new parameters for slot problems and stage specifications, refining the decision-making process for slot suggestions. - Enhanced `ProgressionGraphEditor` to improve user notifications regarding identified issues and suggestions, ensuring clearer communication of path evaluation results. - Modified `buildProgressionComparePayload` and `buildUnifiedSlotReviewComparePayload` to support baseline evaluations, streamlining the comparison process for proposed paths.
2026-06-13 10:39:52 +02:00 · 2026-06-13 10:39:52 +02:00 · 3468b2066e
commit 3468b2066e
parent a1e4ad66df
4 changed files with 251 additions and 29 deletions
--- a/backend/planning_exercise_path_builder.py
+++ b/backend/planning_exercise_path_builder.py
@ -2456,6 +2456,107 @@ def _off_topic_slot_indices(path_qa: Optional[Mapping[str, Any]]) -> Set[int]:
    return set(_off_topic_reasons_by_slot((path_qa or {}).get("off_topic_steps") or []).keys())
 def _resolve_hint_major_index(
    hint: Mapping[str, Any],
    stage_specs: Sequence[StageSpecArtifact],
 ) -> Optional[int]:
    raw = hint.get("roadmap_major_step_index")
    if raw is not None:
        try:
            return int(raw)
        except (TypeError, ValueError):
            return None
    step_index = hint.get("step_index")
    if step_index is None:
        return None
    try:
        pos = int(step_index)
    except (TypeError, ValueError):
        return None
    if 0 <= pos < len(stage_specs):
        return int(stage_specs[pos].major_step_index)
    return pos if pos >= 0 else None
 def _problematic_slots_from_path_qa(
    baseline_qa: Optional[Mapping[str, Any]],
    baseline_steps: Sequence[Mapping[str, Any]],
    stage_specs: Sequence[StageSpecArtifact],
 ) -> Dict[int, List[str]]:
    """Schachstellen aus derselben QS wie „Graph bewerten“ — Basis für Match-Vorschläge."""
    problems: Dict[int, List[str]] = {}
    def _add(midx: int, reason: str) -> None:
        text = (reason or "").strip()
        if not text:
            return
        bucket = problems.setdefault(int(midx), [])
        if text not in bucket:
            bucket.append(text[:400])
    for midx, reasons in _off_topic_reasons_by_slot(
        (baseline_qa or {}).get("off_topic_steps") or [],
    ).items():
        for reason in reasons:
            _add(midx, reason)
    for hint in (baseline_qa or {}).get("optimization_hints") or []:
        if not isinstance(hint, dict):
            continue
        action = str(hint.get("action") or "").strip().lower()
        if action in ("review_roadmap", "refine_stage_spec"):
            continue
        midx = _resolve_hint_major_index(hint, stage_specs)
        if midx is None:
            continue
        _add(
            midx,
            str(
                hint.get("reason")
                or hint.get("issue")
                or hint.get("title")
                or action
            ),
        )
    for raw in (baseline_qa or {}).get("issues") or []:
        text = str(raw or "").strip()
        if not text:
            continue
        for step in baseline_steps or []:
            if not isinstance(step, dict):
                continue
            midx = step.get("roadmap_major_step_index")
            if midx is None:
                continue
            try:
                slot_no = int(midx) + 1
            except (TypeError, ValueError):
                continue
            title = str(step.get("title") or "").strip()
            if (
                f"slot {slot_no}" in text.lower()
                or f"stufe {slot_no}" in text.lower()
                or (title and title.lower() in text.lower())
            ):
                _add(int(midx), text)
    for step in baseline_steps or []:
        if not isinstance(step, dict):
            continue
        midx = step.get("roadmap_major_step_index")
        if midx is None:
            continue
        try:
            major_idx = int(midx)
        except (TypeError, ValueError):
            continue
        if step.get("exercise_id") is None and not step.get("is_ai_proposal"):
            _add(major_idx, "Leerer Slot ohne Bibliotheks-Übung")
    return problems
 def _slot_suggestion_accepted(
    *,
    baseline_qa: Optional[Mapping[str, Any]],
@ -2465,32 +2566,53 @@ def _slot_suggestion_accepted(
    diff: Mapping[str, Any],
    off_topic: bool,
    major_idx: int,
    slot_problem: bool = False,
    stage_specs: Optional[Sequence[StageSpecArtifact]] = None,
    baseline_steps: Optional[Sequence[Mapping[str, Any]]] = None,
    projected_steps: Optional[Sequence[Mapping[str, Any]]] = None,
 ) -> bool:
    """Entscheidet, ob ein Slot-Vorschlag in die Liste kommt."""
    base_id = diff.get("baseline_exercise_id")
    prop_id = diff.get("proposed_exercise_id")
    base_off = _off_topic_slot_indices(baseline_qa)
    proj_off = _off_topic_slot_indices(projected_qa)
    delta = _quality_delta(baseline_score, projected_score)
    if prop_id is not None and base_id is not None and int(base_id) == int(prop_id):
        return False
    if slot_problem and prop_id is not None:
        if major_idx in base_off and major_idx not in proj_off:
            return True
        if delta is not None and delta >= -0.001:
            return True
        if stage_specs is not None:
            proj_problems = _problematic_slots_from_path_qa(
                projected_qa,
                projected_steps or baseline_steps or [],
                stage_specs,
            )
            if major_idx not in proj_problems:
                return True
        return True
    if off_topic and base_id is not None:
        if major_idx in base_off and major_idx not in proj_off:
            return True
-        if major_idx in base_off and prop_id is not None:
+        if prop_id is not None:
-            return _slot_diff_improves_path(diff, _quality_delta(baseline_score, projected_score), off_topic=True)
+            return _slot_diff_improves_path(diff, delta, off_topic=True)
    if base_id is None and prop_id is not None:
-        return _slot_diff_improves_path(diff, _quality_delta(baseline_score, projected_score), off_topic=False)
+        return _slot_diff_improves_path(diff, delta, off_topic=False)
    if base_id is not None and prop_id is not None:
-        if int(base_id) == int(prop_id):
+        return _slot_diff_improves_path(diff, delta, off_topic=False)
            return False
        return _slot_diff_improves_path(diff, _quality_delta(baseline_score, projected_score), off_topic=False)
    if base_id is None and prop_id is None and diff.get("proposed_is_ai_proposal"):
        return _slot_diff_improves_path(
            diff,
-            _quality_delta(baseline_score, projected_score),
+            delta,
-            off_topic=off_topic or major_idx in base_off,
+            off_topic=off_topic or major_idx in base_off or slot_problem,
        )
    return False
@ -2900,6 +3022,11 @@ def _run_unified_slot_improvement_review(
    baseline_score = _path_qa_quality_score(baseline_qa)
    gap_fill_offers = list(qa_pack.get("gap_fill_offers") or [])
    off_topic_map = _off_topic_reasons_by_slot(baseline_qa.get("off_topic_steps") or [])
    problem_slots = _problematic_slots_from_path_qa(
        baseline_qa,
        baseline_steps,
        roadmap_ctx.stage_specs,
    )
    steps_by_major = _steps_by_major_index(baseline_steps)
    spec_by_major = {int(s.major_step_index): s for s in roadmap_ctx.stage_specs}
@ -2913,10 +3040,11 @@ def _run_unified_slot_improvement_review(
        current = dict(steps_by_major.get(major_idx, {}))
        current.setdefault("roadmap_major_step_index", major_idx)
        current_id = current.get("exercise_id")
-        off_topic = major_idx in off_topic_map or bool(
+        slot_problem = major_idx in problem_slots
        off_topic = slot_problem or major_idx in off_topic_map or bool(
            current.get("slot_status") in {"off_topic", "stripped"}
        )
-        off_reasons = off_topic_map.get(major_idx, [])
+        off_reasons = list(problem_slots.get(major_idx, [])) + off_topic_map.get(major_idx, [])
        planned_ids = [
            int(s["exercise_id"])
@ -2937,7 +3065,12 @@ def _run_unified_slot_improvement_review(
                anchor_variant_id = int(vid) if vid is not None else None
        exclude_id: Optional[int] = None
-        if current_id is not None and not off_topic:
+        if current_id is not None and not (off_topic or slot_problem):
            try:
                exclude_id = int(current_id)
            except (TypeError, ValueError):
                exclude_id = None
        elif current_id is not None and (off_topic or slot_problem):
            try:
                exclude_id = int(current_id)
            except (TypeError, ValueError):
@ -2972,7 +3105,7 @@ def _run_unified_slot_improvement_review(
                continue
            if (
                current_id is not None
-                and not off_topic
+                and not (off_topic or slot_problem)
                and int(current_id) == cand_id
            ):
                continue
@ -3006,10 +3139,14 @@ def _run_unified_slot_improvement_review(
                diff=diff_stub,
                off_topic=off_topic,
                major_idx=major_idx,
                slot_problem=slot_problem,
                stage_specs=roadmap_ctx.stage_specs,
                baseline_steps=baseline_steps,
                projected_steps=merged_steps,
            )
            suggestion_type = (
                "remove_and_replace"
-                if off_topic and current_id is not None
+                if (off_topic or slot_problem) and current_id is not None
                else ("library_fill" if current_id is None else "library_improvement")
            )
            entry = {
@ -3023,6 +3160,8 @@ def _run_unified_slot_improvement_review(
                "projected_path_qa": projected_qa,
                "improves_path": improves,
                "off_topic": off_topic,
                "slot_problem": slot_problem,
                "problem_reasons": off_reasons[:6],
                "proposed_is_ai_proposal": False,
                "pro_contra": _build_slot_pro_contra(
                    current_step=current,
@ -3048,6 +3187,7 @@ def _run_unified_slot_improvement_review(
        needs_ai = (
            current_id is None
            or off_topic
            or slot_problem
            or bool(current.get("is_ai_proposal"))
        )
        if not needs_ai or not body.include_ai_gap_fill:
@ -3115,8 +3255,12 @@ def _run_unified_slot_improvement_review(
            baseline_score=baseline_score,
            projected_score=projected_score,
            diff=diff_stub,
-            off_topic=off_topic or major_idx in _off_topic_slot_indices(baseline_qa),
+            off_topic=off_topic,
            major_idx=major_idx,
            slot_problem=slot_problem,
            stage_specs=roadmap_ctx.stage_specs,
            baseline_steps=baseline_steps,
            projected_steps=merged_steps,
        )
        entry = {
            **diff_stub,
@ -3127,8 +3271,10 @@ def _run_unified_slot_improvement_review(
            "projected_quality_score": projected_score,
            "baseline_quality_score": baseline_score,
            "projected_path_qa": projected_qa,
-            "improves_path": improves,
+            "improves_path": improves or slot_problem,
            "off_topic": off_topic,
            "slot_problem": slot_problem,
            "problem_reasons": off_reasons[:6],
            "proposed_is_ai_proposal": True,
            "gap_offer": slot_offer,
            "pro_contra": _build_slot_pro_contra(
@ -3143,12 +3289,16 @@ def _run_unified_slot_improvement_review(
                gap_offer=slot_offer,
            ),
        }
-        if improves:
+        if improves or slot_problem:
            entry["improves_path"] = True
            suggestions.append(entry)
        else:
            rejected.append(entry)
    improvement_diffs = [_suggestion_as_slot_diff(s) for s in suggestions]
    problem_slot_payload = {
        str(k): v for k, v in sorted(problem_slots.items(), key=lambda x: x[0])
    }
    slot_diff_scoring = {
        "baseline_quality_score": baseline_score,
        "scored_diffs": improvement_diffs + [_suggestion_as_slot_diff(r) for r in rejected],
@ -3182,9 +3332,11 @@ def _run_unified_slot_improvement_review(
            "unified_slot_review": True,
            "suggestion_count": len(suggestions),
            "rejected_count": len(rejected),
            "problem_slot_count": len(problem_slots),
        },
        "retrieval_phase": "unified_slot_review",
        "unified_slot_review": True,
        "problem_slots": problem_slot_payload,
        "slot_suggestions": suggestions,
        "slot_diff_scoring": slot_diff_scoring,
        "comparison_mode": True,
--- a/backend/tests/test_planning_problematic_slots.py
+++ b/backend/tests/test_planning_problematic_slots.py
@ -0,0 +1,53 @@
 """Schachstellen-Erkennung für unified Slot-Review."""
 from planning_exercise_path_builder import (
    _problematic_slots_from_path_qa,
    _slot_suggestion_accepted,
 )
 from planning_progression_roadmap import StageSpecArtifact
 def _spec(midx: int) -> StageSpecArtifact:
    return StageSpecArtifact(
        major_step_index=midx,
        learning_goal=f"Lernziel Slot {midx + 1}",
        load_profile=[],
        exercise_type="",
        success_criteria=[],
        anti_patterns=[],
    )
 def test_problematic_slots_from_optimization_hints():
    qa = {
        "optimization_hints": [
            {
                "action": "rematch_slot",
                "step_index": 1,
                "issue": "stage_mismatch",
                "reason": "Übung passt nicht zur Stufe",
            }
        ],
        "off_topic_steps": [],
    }
    steps = [
        {"roadmap_major_step_index": 0, "exercise_id": 1, "title": "A"},
        {"roadmap_major_step_index": 1, "exercise_id": 2, "title": "B"},
    ]
    specs = [_spec(0), _spec(1)]
    problems = _problematic_slots_from_path_qa(qa, steps, specs)
    assert 1 in problems
    assert any("Stufe" in r or "passt" in r for r in problems[1])
 def test_slot_suggestion_accepted_for_problem_slot():
    diff = {"baseline_exercise_id": 10, "proposed_exercise_id": 99}
    assert _slot_suggestion_accepted(
        baseline_qa={"optimization_hints": [{"action": "rematch_slot", "roadmap_major_step_index": 1}]},
        projected_qa={"optimization_hints": []},
        baseline_score=0.7,
        projected_score=0.7,
        diff=diff,
        off_topic=False,
        major_idx=1,
        slot_problem=True,
    )
--- a/frontend/src/components/ProgressionGraphEditor.jsx
+++ b/frontend/src/components/ProgressionGraphEditor.jsx
@ -494,20 +494,31 @@ export default function ProgressionGraphEditor({ graphId, embedded = false, onSa
  }
  const runMatchCompareFlow = async (synced, { source = 'match' } = {}) => {
-    setMatchNotice('Pfad bewerten und je Slot passende Verbesserungen prüfen…')
+    setMatchNotice('Schritt 1/2: Pfad bewerten (wie „Graph bewerten“)…')
    const baselineRes = await fetchPathEvaluate(synced)
    const { draft: evaluated, remainingOffers } = applyEvaluateResult(synced, baselineRes)
    setDraft(evaluated)
    const mergedAfterEval = mergeGapOffersForDraft(evaluated, baselineRes)
    setGapFillOffers(mergedAfterEval.length > 0 ? mergedAfterEval : remainingOffers)
    setMatchNotice('Schritt 2/2: Verbesserungsvorschläge für gemeldete Schachstellen…')
    const reviewRes = await api.suggestProgressionPath({
-      ...buildMatchRequestBase(synced),
+      ...buildEvaluateRequest(synced),
      evaluate_only: false,
      unified_slot_review: true,
      baseline_evaluate_steps: slotsToEvaluateSteps(synced),
      include_llm_intent: false,
      auto_rematch_after_qa: false,
    })
    const qa = reviewRes?.path_qa || null
    setPathQa(qa)
    setDraft((prev) => (prev ? { ...prev, lastFindings: qa } : prev))
-    const compareRes = buildProgressionComparePayload(null, reviewRes)
+    if (!reviewRes?.unified_slot_review) {
-    setGapFillOffers(mergeGapOffersForDraft(synced, reviewRes, reviewRes))
+      throw new Error(
        'Match-Review nicht verfügbar — Backend-Stand prüfen (unified_slot_review fehlt in der Antwort).',
      )
    }
    const compareRes = buildProgressionComparePayload(baselineRes, reviewRes)
    setGapFillOffers(mergeGapOffersForDraft(evaluated, baselineRes, reviewRes))
    presentMatchCompare(compareRes, { source })
    return compareRes
  }
@ -523,11 +534,15 @@ export default function ProgressionGraphEditor({ graphId, embedded = false, onSa
    const baselineQa = res?.baseline_path_qa || null
    const diffCount = res?.slot_diff_count ?? compareDiffsForDialog(res).length
    const rejectedCount = res?.slot_diff_count_rejected ?? rejectedCompareDiffs(res).length
    const problemCount = res?.match_summary?.problem_slot_count
      ?? (res?.problem_slots ? Object.keys(res.problem_slots).length : 0)
    const bPct = pathQaQualityPercent(baselineQa)
    let notice =
      diffCount > 0
-        ? `Match: ${diffCount} Verbesserung(en) — je Slot gegen deinen Pfad (${bPct != null ? `${bPct} %` : 'QS'}) geprüft.`
+        ? `Match: ${diffCount} Verbesserung(en) für gemeldete Schachstellen.`
-        : 'Match: Keine messbare Verbesserung gegenüber deinem Pfad.'
+        : problemCount > 0
          ? `Match: ${problemCount} Schachstelle(n) erkannt, aber kein Bibliotheks-Ersatz mit Gewinn — KI-Angebote im Panel prüfen.`
          : 'Match: Keine Schachstellen — Pfad wirkt konsistent.'
    if (rejectedCount > 0) {
      notice += ` ${rejectedCount} Vorschlag/Vorschläge verworfen (Verschlechterung oder neutral).`
    }
--- a/frontend/src/utils/progressionGraphDraft.js
+++ b/frontend/src/utils/progressionGraphDraft.js
@ -1091,7 +1091,7 @@ function mergeGapFillOffersFromSteps(steps, offers) {
 */
 export function buildProgressionComparePayload(baselineRes, proposedRes) {
  if (proposedRes?.unified_slot_review) {
-    return buildUnifiedSlotReviewComparePayload(proposedRes)
+    return buildUnifiedSlotReviewComparePayload(proposedRes, baselineRes)
  }
  const baselineSteps = Array.isArray(baselineRes?.steps) ? baselineRes.steps : []
@ -1154,9 +1154,11 @@ export function buildProgressionComparePayload(baselineRes, proposedRes) {
 }
 /** Einheitlicher Match-Review (Bewertung + Slot-Vorschläge in einem Lauf). */
-export function buildUnifiedSlotReviewComparePayload(res) {
+export function buildUnifiedSlotReviewComparePayload(res, baselineRes = null) {
-  const baselineSteps = Array.isArray(res?.baseline_steps) ? res.baseline_steps : (res?.steps || [])
+  const baselineSteps = Array.isArray(baselineRes?.steps)
-  const baselineQa = res?.baseline_path_qa || res?.path_qa || null
+    ? baselineRes.steps
    : (Array.isArray(res?.baseline_steps) ? res.baseline_steps : (res?.steps || []))
  const baselineQa = baselineRes?.path_qa || res?.baseline_path_qa || res?.path_qa || null
  const scoring = res?.slot_diff_scoring
  const suggestions = Array.isArray(res?.slot_suggestions) ? res.slot_suggestions : []
  const improving = suggestions.filter((s) => s?.improves_path)