progression V2 #57
|
|
@ -36,6 +36,7 @@ from planning_stage_context import build_contextualized_stage_goal, resolve_path
|
|||
from planning_exercise_path_qa import (
|
||||
apply_llm_path_reorder,
|
||||
build_path_qa_summary,
|
||||
compute_deterministic_path_quality_score,
|
||||
detect_off_topic_steps,
|
||||
detect_path_gaps,
|
||||
insert_bridge_exercises,
|
||||
|
|
@ -2399,6 +2400,110 @@ def _evaluate_steps_for_compare_qa(
|
|||
return suggest_progression_path(cur, tenant=tenant, body=eval_body)
|
||||
|
||||
|
||||
def _quick_evaluate_steps_qa(
|
||||
cur,
|
||||
*,
|
||||
goal_query: str,
|
||||
semantic_brief: PlanningSemanticBrief,
|
||||
steps: Sequence[Mapping[str, Any]],
|
||||
roadmap_ctx: Optional[ProgressionRoadmapContext],
|
||||
) -> Dict[str, Any]:
|
||||
"""Schnelle Pfad-QS ohne rekursiven API-Lauf — für Slot-Vergleiche."""
|
||||
roadmap_first = roadmap_ctx is not None
|
||||
steps_list = list(steps or [])
|
||||
gaps = detect_path_gaps(
|
||||
cur,
|
||||
steps_list,
|
||||
brief=semantic_brief,
|
||||
roadmap_first=roadmap_first,
|
||||
)
|
||||
off_topic_steps = detect_off_topic_steps(
|
||||
cur,
|
||||
steps_list,
|
||||
brief=semantic_brief,
|
||||
goal_query=goal_query,
|
||||
)
|
||||
multistage_qa = run_multistage_path_qa(
|
||||
off_topic_steps=off_topic_steps,
|
||||
stripped_off_topic=[],
|
||||
gaps=gaps,
|
||||
llm_qa=None,
|
||||
llm_applied=False,
|
||||
)
|
||||
path_qa = build_path_qa_summary(
|
||||
gaps=gaps,
|
||||
bridge_inserts=[],
|
||||
ai_proposals=[],
|
||||
gap_fill_offers=[],
|
||||
off_topic_steps=off_topic_steps,
|
||||
stripped_off_topic=[],
|
||||
llm_qa=None,
|
||||
llm_applied=False,
|
||||
roadmap_qa_mode="roadmap_first_lite" if roadmap_first else None,
|
||||
multistage_qa=multistage_qa,
|
||||
)
|
||||
if path_qa.get("quality_score") is None:
|
||||
path_qa["quality_score"] = compute_deterministic_path_quality_score(
|
||||
gaps=gaps,
|
||||
off_topic_steps=off_topic_steps,
|
||||
steps=steps_list,
|
||||
multistage_qa=multistage_qa,
|
||||
)
|
||||
return path_qa
|
||||
|
||||
|
||||
def _off_topic_slot_indices(path_qa: Optional[Mapping[str, Any]]) -> Set[int]:
|
||||
return set(_off_topic_reasons_by_slot((path_qa or {}).get("off_topic_steps") or []).keys())
|
||||
|
||||
|
||||
def _slot_suggestion_accepted(
|
||||
*,
|
||||
baseline_qa: Optional[Mapping[str, Any]],
|
||||
projected_qa: Optional[Mapping[str, Any]],
|
||||
baseline_score: Optional[float],
|
||||
projected_score: Optional[float],
|
||||
diff: Mapping[str, Any],
|
||||
off_topic: bool,
|
||||
major_idx: int,
|
||||
) -> bool:
|
||||
"""Entscheidet, ob ein Slot-Vorschlag in die Liste kommt."""
|
||||
base_id = diff.get("baseline_exercise_id")
|
||||
prop_id = diff.get("proposed_exercise_id")
|
||||
base_off = _off_topic_slot_indices(baseline_qa)
|
||||
proj_off = _off_topic_slot_indices(projected_qa)
|
||||
|
||||
if off_topic and base_id is not None:
|
||||
if major_idx in base_off and major_idx not in proj_off:
|
||||
return True
|
||||
if major_idx in base_off and prop_id is not None:
|
||||
return _slot_diff_improves_path(diff, _quality_delta(baseline_score, projected_score), off_topic=True)
|
||||
|
||||
if base_id is None and prop_id is not None:
|
||||
return _slot_diff_improves_path(diff, _quality_delta(baseline_score, projected_score), off_topic=False)
|
||||
|
||||
if base_id is not None and prop_id is not None:
|
||||
if int(base_id) == int(prop_id):
|
||||
return False
|
||||
return _slot_diff_improves_path(diff, _quality_delta(baseline_score, projected_score), off_topic=False)
|
||||
|
||||
if base_id is None and prop_id is None and diff.get("proposed_is_ai_proposal"):
|
||||
return _slot_diff_improves_path(
|
||||
diff,
|
||||
_quality_delta(baseline_score, projected_score),
|
||||
off_topic=off_topic or major_idx in base_off,
|
||||
)
|
||||
return False
|
||||
|
||||
|
||||
def _quality_delta(
|
||||
baseline_score: Optional[float],
|
||||
projected_score: Optional[float],
|
||||
) -> Optional[float]:
|
||||
if baseline_score is None or projected_score is None:
|
||||
return None
|
||||
return round(float(projected_score) - float(baseline_score), 4)
|
||||
|
||||
|
||||
def _apply_slot_diff_to_steps(
|
||||
baseline_steps: Sequence[Mapping[str, Any]],
|
||||
diff: Mapping[str, Any],
|
||||
|
|
@ -2784,6 +2889,14 @@ def _run_unified_slot_improvement_review(
|
|||
)
|
||||
baseline_steps = list(qa_pack.get("steps") or baseline_steps)
|
||||
baseline_qa = qa_pack.get("path_qa") if isinstance(qa_pack.get("path_qa"), dict) else {}
|
||||
if baseline_qa.get("quality_score") is None:
|
||||
baseline_qa = dict(baseline_qa)
|
||||
baseline_qa["quality_score"] = compute_deterministic_path_quality_score(
|
||||
gaps=baseline_qa.get("large_gaps") or [],
|
||||
off_topic_steps=baseline_qa.get("off_topic_steps") or [],
|
||||
steps=baseline_steps,
|
||||
multistage_qa=baseline_qa,
|
||||
)
|
||||
baseline_score = _path_qa_quality_score(baseline_qa)
|
||||
gap_fill_offers = list(qa_pack.get("gap_fill_offers") or [])
|
||||
off_topic_map = _off_topic_reasons_by_slot(baseline_qa.get("off_topic_steps") or [])
|
||||
|
|
@ -2794,14 +2907,6 @@ def _run_unified_slot_improvement_review(
|
|||
|
||||
suggestions: List[Dict[str, Any]] = []
|
||||
rejected: List[Dict[str, Any]] = []
|
||||
scored_eval_body = body.model_copy(
|
||||
update={
|
||||
"include_llm_path_qa": False,
|
||||
"include_ai_gap_fill": False,
|
||||
"auto_rematch_after_qa": False,
|
||||
"include_roadmap_preview": False,
|
||||
}
|
||||
)
|
||||
|
||||
for step_index, stage_spec in enumerate(roadmap_ctx.stage_specs):
|
||||
major_idx = int(stage_spec.major_step_index)
|
||||
|
|
@ -2856,6 +2961,7 @@ def _run_unified_slot_improvement_review(
|
|||
anchor_variant_id=anchor_variant_id,
|
||||
used=used_other,
|
||||
exclude_exercise_id=exclude_id if not off_topic else int(current_id) if current_id else None,
|
||||
max_candidates=3,
|
||||
)
|
||||
|
||||
accepted_for_slot = False
|
||||
|
|
@ -2882,22 +2988,25 @@ def _run_unified_slot_improvement_review(
|
|||
if int(raw.get("roadmap_major_step_index", -1)) == major_idx:
|
||||
merged_steps[i] = {**raw, **candidate, "roadmap_major_step_index": major_idx}
|
||||
break
|
||||
eval_res = _evaluate_steps_for_compare_qa(
|
||||
eval_res = _quick_evaluate_steps_qa(
|
||||
cur,
|
||||
tenant=tenant,
|
||||
body=scored_eval_body,
|
||||
goal_query=goal_query,
|
||||
semantic_brief=semantic_brief,
|
||||
steps=merged_steps,
|
||||
roadmap_ctx=roadmap_ctx,
|
||||
)
|
||||
projected_qa = (
|
||||
eval_res.get("path_qa")
|
||||
if isinstance(eval_res, dict) and isinstance(eval_res.get("path_qa"), dict)
|
||||
else None
|
||||
)
|
||||
projected_qa = eval_res if isinstance(eval_res, dict) else None
|
||||
projected_score = _path_qa_quality_score(projected_qa)
|
||||
delta: Optional[float] = None
|
||||
if baseline_score is not None and projected_score is not None:
|
||||
delta = round(projected_score - baseline_score, 4)
|
||||
improves = _slot_diff_improves_path(diff_stub, delta, off_topic=off_topic)
|
||||
delta = _quality_delta(baseline_score, projected_score)
|
||||
improves = _slot_suggestion_accepted(
|
||||
baseline_qa=baseline_qa,
|
||||
projected_qa=projected_qa,
|
||||
baseline_score=baseline_score,
|
||||
projected_score=projected_score,
|
||||
diff=diff_stub,
|
||||
off_topic=off_topic,
|
||||
major_idx=major_idx,
|
||||
)
|
||||
suggestion_type = (
|
||||
"remove_and_replace"
|
||||
if off_topic and current_id is not None
|
||||
|
|
@ -2990,24 +3099,25 @@ def _run_unified_slot_improvement_review(
|
|||
"proposed_title": ai_step.get("title"),
|
||||
}
|
||||
merged_steps = _apply_slot_diff_to_steps(baseline_steps, diff_stub, [ai_step])
|
||||
eval_res = _evaluate_steps_for_compare_qa(
|
||||
eval_res = _quick_evaluate_steps_qa(
|
||||
cur,
|
||||
tenant=tenant,
|
||||
body=scored_eval_body,
|
||||
goal_query=goal_query,
|
||||
semantic_brief=semantic_brief,
|
||||
steps=merged_steps,
|
||||
roadmap_ctx=roadmap_ctx,
|
||||
)
|
||||
projected_qa = (
|
||||
eval_res.get("path_qa")
|
||||
if isinstance(eval_res, dict) and isinstance(eval_res.get("path_qa"), dict)
|
||||
else None
|
||||
)
|
||||
projected_qa = eval_res if isinstance(eval_res, dict) else None
|
||||
projected_score = _path_qa_quality_score(projected_qa)
|
||||
delta = (
|
||||
round(projected_score - baseline_score, 4)
|
||||
if baseline_score is not None and projected_score is not None
|
||||
else None
|
||||
delta = _quality_delta(baseline_score, projected_score)
|
||||
improves = _slot_suggestion_accepted(
|
||||
baseline_qa=baseline_qa,
|
||||
projected_qa=projected_qa,
|
||||
baseline_score=baseline_score,
|
||||
projected_score=projected_score,
|
||||
diff=diff_stub,
|
||||
off_topic=off_topic or major_idx in _off_topic_slot_indices(baseline_qa),
|
||||
major_idx=major_idx,
|
||||
)
|
||||
improves = _slot_diff_improves_path(diff_stub, delta, off_topic=off_topic or current_id is None)
|
||||
entry = {
|
||||
**diff_stub,
|
||||
"baseline_slot_status": current.get("slot_status"),
|
||||
|
|
|
|||
|
|
@ -745,12 +745,44 @@ def build_path_qa_summary(
|
|||
f"Schritt „{o.get('title')}“ passt nicht zum Pfad-Thema"
|
||||
for o in off_topic
|
||||
]
|
||||
summary["quality_score"] = compute_deterministic_path_quality_score(
|
||||
gaps=gaps,
|
||||
off_topic_steps=off_topic,
|
||||
steps=steps,
|
||||
multistage_qa=multistage_qa,
|
||||
)
|
||||
return summary
|
||||
|
||||
|
||||
def compute_deterministic_path_quality_score(
|
||||
*,
|
||||
gaps: Sequence[Mapping[str, Any]],
|
||||
off_topic_steps: Sequence[Mapping[str, Any]],
|
||||
steps: Optional[Sequence[Mapping[str, Any]]] = None,
|
||||
multistage_qa: Optional[Mapping[str, Any]] = None,
|
||||
) -> float:
|
||||
"""Heuristische Pfad-QS ohne LLM — Basis für Slot-Vergleiche."""
|
||||
score = 0.92
|
||||
score -= 0.08 * len(off_topic_steps or [])
|
||||
score -= 0.05 * len(gaps or [])
|
||||
if steps:
|
||||
empty = sum(
|
||||
1
|
||||
for s in steps
|
||||
if isinstance(s, dict)
|
||||
and s.get("exercise_id") is None
|
||||
and not s.get("is_ai_proposal")
|
||||
)
|
||||
score -= 0.06 * empty
|
||||
hint_count = int((multistage_qa or {}).get("optimization_hint_count") or 0)
|
||||
score -= min(0.14, 0.02 * hint_count)
|
||||
return max(0.35, min(0.98, round(score, 4)))
|
||||
|
||||
|
||||
__all__ = [
|
||||
"apply_llm_path_reorder",
|
||||
"build_path_qa_summary",
|
||||
"compute_deterministic_path_quality_score",
|
||||
"detect_off_topic_steps",
|
||||
"detect_path_gaps",
|
||||
"is_roadmap_planned_neighbor_pair",
|
||||
|
|
|
|||
21
backend/tests/test_planning_deterministic_quality_score.py
Normal file
21
backend/tests/test_planning_deterministic_quality_score.py
Normal file
|
|
@ -0,0 +1,21 @@
|
|||
"""Deterministische Pfad-QS ohne LLM."""
|
||||
from planning_exercise_path_qa import compute_deterministic_path_quality_score
|
||||
|
||||
|
||||
def test_deterministic_quality_score_penalizes_off_topic():
|
||||
base = compute_deterministic_path_quality_score(gaps=[], off_topic_steps=[])
|
||||
with_off = compute_deterministic_path_quality_score(
|
||||
gaps=[],
|
||||
off_topic_steps=[{"roadmap_major_step_index": 1}],
|
||||
)
|
||||
assert with_off < base
|
||||
|
||||
|
||||
def test_deterministic_quality_score_penalizes_empty_slots():
|
||||
base = compute_deterministic_path_quality_score(gaps=[], off_topic_steps=[], steps=[])
|
||||
with_empty = compute_deterministic_path_quality_score(
|
||||
gaps=[],
|
||||
off_topic_steps=[],
|
||||
steps=[{"exercise_id": None}, {"exercise_id": 1}],
|
||||
)
|
||||
assert with_empty < base
|
||||
|
|
@ -500,10 +500,11 @@ export default function ProgressionGraphEditor({ graphId, embedded = false, onSa
|
|||
unified_slot_review: true,
|
||||
baseline_evaluate_steps: slotsToEvaluateSteps(synced),
|
||||
include_llm_intent: false,
|
||||
include_llm_path_qa: false,
|
||||
auto_rematch_after_qa: false,
|
||||
})
|
||||
setPathQa(reviewRes?.path_qa || null)
|
||||
const qa = reviewRes?.path_qa || null
|
||||
setPathQa(qa)
|
||||
setDraft((prev) => (prev ? { ...prev, lastFindings: qa } : prev))
|
||||
|
||||
const compareRes = buildProgressionComparePayload(null, reviewRes)
|
||||
setGapFillOffers(mergeGapOffersForDraft(synced, reviewRes, reviewRes))
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user