diff --git a/backend/planning_exercise_path_builder.py b/backend/planning_exercise_path_builder.py index 7114ced..9778606 100644 --- a/backend/planning_exercise_path_builder.py +++ b/backend/planning_exercise_path_builder.py @@ -36,6 +36,7 @@ from planning_stage_context import build_contextualized_stage_goal, resolve_path from planning_exercise_path_qa import ( apply_llm_path_reorder, build_path_qa_summary, + compute_deterministic_path_quality_score, detect_off_topic_steps, detect_path_gaps, insert_bridge_exercises, @@ -2399,6 +2400,110 @@ def _evaluate_steps_for_compare_qa( return suggest_progression_path(cur, tenant=tenant, body=eval_body) +def _quick_evaluate_steps_qa( + cur, + *, + goal_query: str, + semantic_brief: PlanningSemanticBrief, + steps: Sequence[Mapping[str, Any]], + roadmap_ctx: Optional[ProgressionRoadmapContext], +) -> Dict[str, Any]: + """Schnelle Pfad-QS ohne rekursiven API-Lauf — für Slot-Vergleiche.""" + roadmap_first = roadmap_ctx is not None + steps_list = list(steps or []) + gaps = detect_path_gaps( + cur, + steps_list, + brief=semantic_brief, + roadmap_first=roadmap_first, + ) + off_topic_steps = detect_off_topic_steps( + cur, + steps_list, + brief=semantic_brief, + goal_query=goal_query, + ) + multistage_qa = run_multistage_path_qa( + off_topic_steps=off_topic_steps, + stripped_off_topic=[], + gaps=gaps, + llm_qa=None, + llm_applied=False, + ) + path_qa = build_path_qa_summary( + gaps=gaps, + bridge_inserts=[], + ai_proposals=[], + gap_fill_offers=[], + off_topic_steps=off_topic_steps, + stripped_off_topic=[], + llm_qa=None, + llm_applied=False, + roadmap_qa_mode="roadmap_first_lite" if roadmap_first else None, + multistage_qa=multistage_qa, + ) + if path_qa.get("quality_score") is None: + path_qa["quality_score"] = compute_deterministic_path_quality_score( + gaps=gaps, + off_topic_steps=off_topic_steps, + steps=steps_list, + multistage_qa=multistage_qa, + ) + return path_qa + + +def _off_topic_slot_indices(path_qa: Optional[Mapping[str, Any]]) -> Set[int]: + return set(_off_topic_reasons_by_slot((path_qa or {}).get("off_topic_steps") or []).keys()) + + +def _slot_suggestion_accepted( + *, + baseline_qa: Optional[Mapping[str, Any]], + projected_qa: Optional[Mapping[str, Any]], + baseline_score: Optional[float], + projected_score: Optional[float], + diff: Mapping[str, Any], + off_topic: bool, + major_idx: int, +) -> bool: + """Entscheidet, ob ein Slot-Vorschlag in die Liste kommt.""" + base_id = diff.get("baseline_exercise_id") + prop_id = diff.get("proposed_exercise_id") + base_off = _off_topic_slot_indices(baseline_qa) + proj_off = _off_topic_slot_indices(projected_qa) + + if off_topic and base_id is not None: + if major_idx in base_off and major_idx not in proj_off: + return True + if major_idx in base_off and prop_id is not None: + return _slot_diff_improves_path(diff, _quality_delta(baseline_score, projected_score), off_topic=True) + + if base_id is None and prop_id is not None: + return _slot_diff_improves_path(diff, _quality_delta(baseline_score, projected_score), off_topic=False) + + if base_id is not None and prop_id is not None: + if int(base_id) == int(prop_id): + return False + return _slot_diff_improves_path(diff, _quality_delta(baseline_score, projected_score), off_topic=False) + + if base_id is None and prop_id is None and diff.get("proposed_is_ai_proposal"): + return _slot_diff_improves_path( + diff, + _quality_delta(baseline_score, projected_score), + off_topic=off_topic or major_idx in base_off, + ) + return False + + +def _quality_delta( + baseline_score: Optional[float], + projected_score: Optional[float], +) -> Optional[float]: + if baseline_score is None or projected_score is None: + return None + return round(float(projected_score) - float(baseline_score), 4) + + def _apply_slot_diff_to_steps( baseline_steps: Sequence[Mapping[str, Any]], diff: Mapping[str, Any], @@ -2784,6 +2889,14 @@ def _run_unified_slot_improvement_review( ) baseline_steps = list(qa_pack.get("steps") or baseline_steps) baseline_qa = qa_pack.get("path_qa") if isinstance(qa_pack.get("path_qa"), dict) else {} + if baseline_qa.get("quality_score") is None: + baseline_qa = dict(baseline_qa) + baseline_qa["quality_score"] = compute_deterministic_path_quality_score( + gaps=baseline_qa.get("large_gaps") or [], + off_topic_steps=baseline_qa.get("off_topic_steps") or [], + steps=baseline_steps, + multistage_qa=baseline_qa, + ) baseline_score = _path_qa_quality_score(baseline_qa) gap_fill_offers = list(qa_pack.get("gap_fill_offers") or []) off_topic_map = _off_topic_reasons_by_slot(baseline_qa.get("off_topic_steps") or []) @@ -2794,14 +2907,6 @@ def _run_unified_slot_improvement_review( suggestions: List[Dict[str, Any]] = [] rejected: List[Dict[str, Any]] = [] - scored_eval_body = body.model_copy( - update={ - "include_llm_path_qa": False, - "include_ai_gap_fill": False, - "auto_rematch_after_qa": False, - "include_roadmap_preview": False, - } - ) for step_index, stage_spec in enumerate(roadmap_ctx.stage_specs): major_idx = int(stage_spec.major_step_index) @@ -2856,6 +2961,7 @@ def _run_unified_slot_improvement_review( anchor_variant_id=anchor_variant_id, used=used_other, exclude_exercise_id=exclude_id if not off_topic else int(current_id) if current_id else None, + max_candidates=3, ) accepted_for_slot = False @@ -2882,22 +2988,25 @@ def _run_unified_slot_improvement_review( if int(raw.get("roadmap_major_step_index", -1)) == major_idx: merged_steps[i] = {**raw, **candidate, "roadmap_major_step_index": major_idx} break - eval_res = _evaluate_steps_for_compare_qa( + eval_res = _quick_evaluate_steps_qa( cur, - tenant=tenant, - body=scored_eval_body, + goal_query=goal_query, + semantic_brief=semantic_brief, steps=merged_steps, + roadmap_ctx=roadmap_ctx, ) - projected_qa = ( - eval_res.get("path_qa") - if isinstance(eval_res, dict) and isinstance(eval_res.get("path_qa"), dict) - else None - ) + projected_qa = eval_res if isinstance(eval_res, dict) else None projected_score = _path_qa_quality_score(projected_qa) - delta: Optional[float] = None - if baseline_score is not None and projected_score is not None: - delta = round(projected_score - baseline_score, 4) - improves = _slot_diff_improves_path(diff_stub, delta, off_topic=off_topic) + delta = _quality_delta(baseline_score, projected_score) + improves = _slot_suggestion_accepted( + baseline_qa=baseline_qa, + projected_qa=projected_qa, + baseline_score=baseline_score, + projected_score=projected_score, + diff=diff_stub, + off_topic=off_topic, + major_idx=major_idx, + ) suggestion_type = ( "remove_and_replace" if off_topic and current_id is not None @@ -2990,24 +3099,25 @@ def _run_unified_slot_improvement_review( "proposed_title": ai_step.get("title"), } merged_steps = _apply_slot_diff_to_steps(baseline_steps, diff_stub, [ai_step]) - eval_res = _evaluate_steps_for_compare_qa( + eval_res = _quick_evaluate_steps_qa( cur, - tenant=tenant, - body=scored_eval_body, + goal_query=goal_query, + semantic_brief=semantic_brief, steps=merged_steps, + roadmap_ctx=roadmap_ctx, ) - projected_qa = ( - eval_res.get("path_qa") - if isinstance(eval_res, dict) and isinstance(eval_res.get("path_qa"), dict) - else None - ) + projected_qa = eval_res if isinstance(eval_res, dict) else None projected_score = _path_qa_quality_score(projected_qa) - delta = ( - round(projected_score - baseline_score, 4) - if baseline_score is not None and projected_score is not None - else None + delta = _quality_delta(baseline_score, projected_score) + improves = _slot_suggestion_accepted( + baseline_qa=baseline_qa, + projected_qa=projected_qa, + baseline_score=baseline_score, + projected_score=projected_score, + diff=diff_stub, + off_topic=off_topic or major_idx in _off_topic_slot_indices(baseline_qa), + major_idx=major_idx, ) - improves = _slot_diff_improves_path(diff_stub, delta, off_topic=off_topic or current_id is None) entry = { **diff_stub, "baseline_slot_status": current.get("slot_status"), diff --git a/backend/planning_exercise_path_qa.py b/backend/planning_exercise_path_qa.py index 472ec35..48770a1 100644 --- a/backend/planning_exercise_path_qa.py +++ b/backend/planning_exercise_path_qa.py @@ -745,12 +745,44 @@ def build_path_qa_summary( f"Schritt „{o.get('title')}“ passt nicht zum Pfad-Thema" for o in off_topic ] + summary["quality_score"] = compute_deterministic_path_quality_score( + gaps=gaps, + off_topic_steps=off_topic, + steps=steps, + multistage_qa=multistage_qa, + ) return summary +def compute_deterministic_path_quality_score( + *, + gaps: Sequence[Mapping[str, Any]], + off_topic_steps: Sequence[Mapping[str, Any]], + steps: Optional[Sequence[Mapping[str, Any]]] = None, + multistage_qa: Optional[Mapping[str, Any]] = None, +) -> float: + """Heuristische Pfad-QS ohne LLM — Basis für Slot-Vergleiche.""" + score = 0.92 + score -= 0.08 * len(off_topic_steps or []) + score -= 0.05 * len(gaps or []) + if steps: + empty = sum( + 1 + for s in steps + if isinstance(s, dict) + and s.get("exercise_id") is None + and not s.get("is_ai_proposal") + ) + score -= 0.06 * empty + hint_count = int((multistage_qa or {}).get("optimization_hint_count") or 0) + score -= min(0.14, 0.02 * hint_count) + return max(0.35, min(0.98, round(score, 4))) + + __all__ = [ "apply_llm_path_reorder", "build_path_qa_summary", + "compute_deterministic_path_quality_score", "detect_off_topic_steps", "detect_path_gaps", "is_roadmap_planned_neighbor_pair", diff --git a/backend/tests/test_planning_deterministic_quality_score.py b/backend/tests/test_planning_deterministic_quality_score.py new file mode 100644 index 0000000..f5a0975 --- /dev/null +++ b/backend/tests/test_planning_deterministic_quality_score.py @@ -0,0 +1,21 @@ +"""Deterministische Pfad-QS ohne LLM.""" +from planning_exercise_path_qa import compute_deterministic_path_quality_score + + +def test_deterministic_quality_score_penalizes_off_topic(): + base = compute_deterministic_path_quality_score(gaps=[], off_topic_steps=[]) + with_off = compute_deterministic_path_quality_score( + gaps=[], + off_topic_steps=[{"roadmap_major_step_index": 1}], + ) + assert with_off < base + + +def test_deterministic_quality_score_penalizes_empty_slots(): + base = compute_deterministic_path_quality_score(gaps=[], off_topic_steps=[], steps=[]) + with_empty = compute_deterministic_path_quality_score( + gaps=[], + off_topic_steps=[], + steps=[{"exercise_id": None}, {"exercise_id": 1}], + ) + assert with_empty < base diff --git a/frontend/src/components/ProgressionGraphEditor.jsx b/frontend/src/components/ProgressionGraphEditor.jsx index 4d87c5b..43eddbe 100644 --- a/frontend/src/components/ProgressionGraphEditor.jsx +++ b/frontend/src/components/ProgressionGraphEditor.jsx @@ -500,10 +500,11 @@ export default function ProgressionGraphEditor({ graphId, embedded = false, onSa unified_slot_review: true, baseline_evaluate_steps: slotsToEvaluateSteps(synced), include_llm_intent: false, - include_llm_path_qa: false, auto_rematch_after_qa: false, }) - setPathQa(reviewRes?.path_qa || null) + const qa = reviewRes?.path_qa || null + setPathQa(qa) + setDraft((prev) => (prev ? { ...prev, lastFindings: qa } : prev)) const compareRes = buildProgressionComparePayload(null, reviewRes) setGapFillOffers(mergeGapOffersForDraft(synced, reviewRes, reviewRes))