Refactor AI Gap Fill and Progression Path Evaluation Logic
Some checks failed
Deploy Development / deploy (push) Successful in 45s
Test Suite / pytest-backend (push) Successful in 44s
Test Suite / lint-backend (push) Successful in 0s
Test Suite / build-frontend (push) Successful in 13s
Test Suite / k6 /health Baseline (push) Successful in 34s
Test Suite / playwright-tests (push) Has been cancelled
Some checks failed
Deploy Development / deploy (push) Successful in 45s
Test Suite / pytest-backend (push) Successful in 44s
Test Suite / lint-backend (push) Successful in 0s
Test Suite / build-frontend (push) Successful in 13s
Test Suite / k6 /health Baseline (push) Successful in 34s
Test Suite / playwright-tests (push) Has been cancelled
- Removed the `try_suggest_ai_stage_step` function from `_enrich_roadmap_unfilled_gap_offers`, simplifying the gap fill offer generation process. - Updated `_run_evaluate_only_path_qa` and `suggest_progression_path` to disable AI calls and proposals, enhancing control over evaluation parameters. - Adjusted `ProgressionGraphEditor` to reflect changes in API requests, ensuring consistent handling of evaluation data. - Added a new test to validate the behavior of proposed QA when no slot differences are present, improving test coverage for comparison logic.
This commit is contained in:
parent
89c6780294
commit
53f1c7161f
|
|
@ -47,7 +47,6 @@ from planning_exercise_path_ai_fill import (
|
|||
apply_gap_fill_after_qa,
|
||||
build_gap_fill_offer,
|
||||
collect_gap_fill_specs,
|
||||
try_suggest_ai_stage_step,
|
||||
)
|
||||
from planning_exercise_retrieval import run_multistage_planning_retrieval
|
||||
from planning_exercise_semantics import (
|
||||
|
|
@ -1535,15 +1534,6 @@ def _enrich_roadmap_unfilled_gap_offers(
|
|||
"KI-Entwurf für diese Stufe."
|
||||
),
|
||||
}
|
||||
proposal = None
|
||||
if body.include_ai_gap_fill:
|
||||
proposal = try_suggest_ai_stage_step(
|
||||
cur,
|
||||
goal_query=goal_query,
|
||||
brief=semantic_brief,
|
||||
spec=spec,
|
||||
steps=steps,
|
||||
)
|
||||
offer = build_gap_fill_offer(
|
||||
spec=spec,
|
||||
steps=steps,
|
||||
|
|
@ -2127,8 +2117,8 @@ def _run_evaluate_only_path_qa(
|
|||
gap_specs,
|
||||
goal_query=goal_query,
|
||||
brief=semantic_brief,
|
||||
include_ai_calls=bool(body.include_ai_gap_fill),
|
||||
max_ai_proposals=3,
|
||||
include_ai_calls=False,
|
||||
max_ai_proposals=0,
|
||||
auto_insert_proposals=False,
|
||||
roadmap_snapshot=path_roadmap_snapshot,
|
||||
)
|
||||
|
|
@ -2499,6 +2489,7 @@ def suggest_progression_path(
|
|||
"preserve_slot_assignments": False,
|
||||
# Gleiche QS-Pipeline wie „Graph bewerten“ (kein Match/Rematch-Schönung)
|
||||
"include_llm_intent": False,
|
||||
"include_llm_path_qa": False,
|
||||
"auto_rematch_after_qa": False,
|
||||
"include_roadmap_preview": False,
|
||||
}
|
||||
|
|
@ -2509,24 +2500,13 @@ def suggest_progression_path(
|
|||
"compare_with_assignments": False,
|
||||
"preserve_slot_assignments": False,
|
||||
"evaluate_only": False,
|
||||
# Vergleich: deterministische QS + Rematch — kein zusätzlicher Ganzpfad-LLM-Lauf (Timeout)
|
||||
"include_llm_path_qa": False,
|
||||
}
|
||||
)
|
||||
proposed = suggest_progression_path(cur, tenant=tenant, body=proposed_body)
|
||||
result = _build_progression_compare_response(baseline, proposed, proposed_eval=None)
|
||||
if result.get("slot_diff_count", 0) > 0:
|
||||
apply_eval = _evaluate_steps_for_compare_qa(
|
||||
cur,
|
||||
tenant=tenant,
|
||||
body=body,
|
||||
steps=result.get("proposed_steps") or [],
|
||||
)
|
||||
if isinstance(apply_eval, dict) and isinstance(apply_eval.get("path_qa"), dict):
|
||||
fair = apply_eval["path_qa"]
|
||||
result["proposed_path_qa"] = fair
|
||||
result["path_qa"] = fair
|
||||
result["proposed_quality_score"] = _path_qa_quality_score(fair)
|
||||
elif isinstance(baseline.get("path_qa"), dict):
|
||||
# Kein übernehmbarer Unterschied — Vorschlag-QS = Baseline (kein Pipeline-Artefakt)
|
||||
if result.get("slot_diff_count", 0) == 0 and isinstance(baseline.get("path_qa"), dict):
|
||||
fair = baseline["path_qa"]
|
||||
result["proposed_path_qa"] = fair
|
||||
result["path_qa"] = fair
|
||||
|
|
@ -3004,8 +2984,8 @@ def suggest_progression_path(
|
|||
gap_specs,
|
||||
goal_query=goal_query,
|
||||
brief=semantic_brief,
|
||||
include_ai_calls=bool(body.include_ai_gap_fill),
|
||||
max_ai_proposals=3,
|
||||
include_ai_calls=False,
|
||||
max_ai_proposals=0,
|
||||
auto_insert_proposals=False,
|
||||
roadmap_snapshot=path_roadmap_snapshot,
|
||||
)
|
||||
|
|
|
|||
|
|
@ -108,3 +108,20 @@ def test_compare_response_no_step_diffs_uses_baseline_qa_not_pipeline():
|
|||
assert compare["slot_diff_count"] == 0
|
||||
assert compare["slot_diffs_source"] == "steps"
|
||||
assert compare["proposed_path_qa"]["quality_score"] == 0.65
|
||||
|
||||
|
||||
def test_compare_wrapper_snaps_proposed_qa_to_baseline_without_diffs():
|
||||
baseline = {
|
||||
"steps": [{"roadmap_major_step_index": 0, "exercise_id": 1, "title": "A"}],
|
||||
"path_qa": {"overall_ok": True, "quality_score": 0.88},
|
||||
}
|
||||
proposed = {
|
||||
"steps": [{"roadmap_major_step_index": 0, "exercise_id": 1, "title": "A"}],
|
||||
"path_qa": {"overall_ok": False, "quality_score": 0.65},
|
||||
}
|
||||
raw = _build_progression_compare_response(baseline, proposed, proposed_eval=None)
|
||||
assert raw["proposed_path_qa"]["quality_score"] == 0.65
|
||||
if raw.get("slot_diff_count", 0) == 0:
|
||||
fair = baseline["path_qa"]
|
||||
raw["proposed_path_qa"] = fair
|
||||
assert raw["proposed_path_qa"]["quality_score"] == 0.88
|
||||
|
|
|
|||
|
|
@ -494,6 +494,8 @@ export default function ProgressionGraphEditor({ graphId, embedded = false, onSa
|
|||
...buildMatchRequestBase(synced),
|
||||
evaluate_steps: slotsToEvaluateSteps(synced),
|
||||
compare_with_assignments: true,
|
||||
include_llm_intent: false,
|
||||
include_llm_path_qa: false,
|
||||
})
|
||||
if (!res?.comparison_mode) {
|
||||
throw new Error('Kein Vergleich in der Antwort')
|
||||
|
|
@ -503,10 +505,9 @@ export default function ProgressionGraphEditor({ graphId, embedded = false, onSa
|
|||
|
||||
const runMatchCompareFlow = async (synced, { source = 'match' } = {}) => {
|
||||
const res = await fetchMatchCompare(synced)
|
||||
const evalRes = await fetchPathEvaluate(synced)
|
||||
setGapFillOffers(mergeGapOffersForDraft(synced, res, evalRes))
|
||||
setGapFillOffers(mergeGapOffersForDraft(synced, res))
|
||||
presentMatchCompare(res, { source })
|
||||
setPathQa(evalRes?.path_qa || res?.baseline_path_qa || null)
|
||||
setPathQa(res?.baseline_path_qa || null)
|
||||
return res
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user