Refactor AI Gap Fill and Progression Path Evaluation Logic
Some checks failed
Deploy Development / deploy (push) Successful in 45s
Test Suite / pytest-backend (push) Successful in 44s
Test Suite / lint-backend (push) Successful in 0s
Test Suite / build-frontend (push) Successful in 13s
Test Suite / k6 /health Baseline (push) Successful in 34s
Test Suite / playwright-tests (push) Has been cancelled

- Removed the `try_suggest_ai_stage_step` function from `_enrich_roadmap_unfilled_gap_offers`, simplifying the gap fill offer generation process.
- Updated `_run_evaluate_only_path_qa` and `suggest_progression_path` to disable AI calls and proposals, enhancing control over evaluation parameters.
- Adjusted `ProgressionGraphEditor` to reflect changes in API requests, ensuring consistent handling of evaluation data.
- Added a new test to validate the behavior of proposed QA when no slot differences are present, improving test coverage for comparison logic.
This commit is contained in:
Lars 2026-06-13 08:43:02 +02:00
parent 89c6780294
commit 53f1c7161f
3 changed files with 29 additions and 31 deletions

View File

@ -47,7 +47,6 @@ from planning_exercise_path_ai_fill import (
apply_gap_fill_after_qa,
build_gap_fill_offer,
collect_gap_fill_specs,
try_suggest_ai_stage_step,
)
from planning_exercise_retrieval import run_multistage_planning_retrieval
from planning_exercise_semantics import (
@ -1535,15 +1534,6 @@ def _enrich_roadmap_unfilled_gap_offers(
"KI-Entwurf für diese Stufe."
),
}
proposal = None
if body.include_ai_gap_fill:
proposal = try_suggest_ai_stage_step(
cur,
goal_query=goal_query,
brief=semantic_brief,
spec=spec,
steps=steps,
)
offer = build_gap_fill_offer(
spec=spec,
steps=steps,
@ -2127,8 +2117,8 @@ def _run_evaluate_only_path_qa(
gap_specs,
goal_query=goal_query,
brief=semantic_brief,
include_ai_calls=bool(body.include_ai_gap_fill),
max_ai_proposals=3,
include_ai_calls=False,
max_ai_proposals=0,
auto_insert_proposals=False,
roadmap_snapshot=path_roadmap_snapshot,
)
@ -2499,6 +2489,7 @@ def suggest_progression_path(
"preserve_slot_assignments": False,
# Gleiche QS-Pipeline wie „Graph bewerten“ (kein Match/Rematch-Schönung)
"include_llm_intent": False,
"include_llm_path_qa": False,
"auto_rematch_after_qa": False,
"include_roadmap_preview": False,
}
@ -2509,24 +2500,13 @@ def suggest_progression_path(
"compare_with_assignments": False,
"preserve_slot_assignments": False,
"evaluate_only": False,
# Vergleich: deterministische QS + Rematch — kein zusätzlicher Ganzpfad-LLM-Lauf (Timeout)
"include_llm_path_qa": False,
}
)
proposed = suggest_progression_path(cur, tenant=tenant, body=proposed_body)
result = _build_progression_compare_response(baseline, proposed, proposed_eval=None)
if result.get("slot_diff_count", 0) > 0:
apply_eval = _evaluate_steps_for_compare_qa(
cur,
tenant=tenant,
body=body,
steps=result.get("proposed_steps") or [],
)
if isinstance(apply_eval, dict) and isinstance(apply_eval.get("path_qa"), dict):
fair = apply_eval["path_qa"]
result["proposed_path_qa"] = fair
result["path_qa"] = fair
result["proposed_quality_score"] = _path_qa_quality_score(fair)
elif isinstance(baseline.get("path_qa"), dict):
# Kein übernehmbarer Unterschied — Vorschlag-QS = Baseline (kein Pipeline-Artefakt)
if result.get("slot_diff_count", 0) == 0 and isinstance(baseline.get("path_qa"), dict):
fair = baseline["path_qa"]
result["proposed_path_qa"] = fair
result["path_qa"] = fair
@ -3004,8 +2984,8 @@ def suggest_progression_path(
gap_specs,
goal_query=goal_query,
brief=semantic_brief,
include_ai_calls=bool(body.include_ai_gap_fill),
max_ai_proposals=3,
include_ai_calls=False,
max_ai_proposals=0,
auto_insert_proposals=False,
roadmap_snapshot=path_roadmap_snapshot,
)

View File

@ -108,3 +108,20 @@ def test_compare_response_no_step_diffs_uses_baseline_qa_not_pipeline():
assert compare["slot_diff_count"] == 0
assert compare["slot_diffs_source"] == "steps"
assert compare["proposed_path_qa"]["quality_score"] == 0.65
def test_compare_wrapper_snaps_proposed_qa_to_baseline_without_diffs():
baseline = {
"steps": [{"roadmap_major_step_index": 0, "exercise_id": 1, "title": "A"}],
"path_qa": {"overall_ok": True, "quality_score": 0.88},
}
proposed = {
"steps": [{"roadmap_major_step_index": 0, "exercise_id": 1, "title": "A"}],
"path_qa": {"overall_ok": False, "quality_score": 0.65},
}
raw = _build_progression_compare_response(baseline, proposed, proposed_eval=None)
assert raw["proposed_path_qa"]["quality_score"] == 0.65
if raw.get("slot_diff_count", 0) == 0:
fair = baseline["path_qa"]
raw["proposed_path_qa"] = fair
assert raw["proposed_path_qa"]["quality_score"] == 0.88

View File

@ -494,6 +494,8 @@ export default function ProgressionGraphEditor({ graphId, embedded = false, onSa
...buildMatchRequestBase(synced),
evaluate_steps: slotsToEvaluateSteps(synced),
compare_with_assignments: true,
include_llm_intent: false,
include_llm_path_qa: false,
})
if (!res?.comparison_mode) {
throw new Error('Kein Vergleich in der Antwort')
@ -503,10 +505,9 @@ export default function ProgressionGraphEditor({ graphId, embedded = false, onSa
const runMatchCompareFlow = async (synced, { source = 'match' } = {}) => {
const res = await fetchMatchCompare(synced)
const evalRes = await fetchPathEvaluate(synced)
setGapFillOffers(mergeGapOffersForDraft(synced, res, evalRes))
setGapFillOffers(mergeGapOffersForDraft(synced, res))
presentMatchCompare(res, { source })
setPathQa(evalRes?.path_qa || res?.baseline_path_qa || null)
setPathQa(res?.baseline_path_qa || null)
return res
}