diff --git a/.claude/docs/working/PLANNING_EXERCISE_SUGGEST_CONTEXT.md b/.claude/docs/working/PLANNING_EXERCISE_SUGGEST_CONTEXT.md index dfcc1aa..3c97f27 100644 --- a/.claude/docs/working/PLANNING_EXERCISE_SUGGEST_CONTEXT.md +++ b/.claude/docs/working/PLANNING_EXERCISE_SUGGEST_CONTEXT.md @@ -2,7 +2,7 @@ **Version:** 0.2 **Datum:** 2026-05-23 -**Status:** P0–P2 ✅ · Phase A/B/B2 ✅ · **Phase C1–C3 ✅** (Progressionsgraph + Varianten + Pfad-Builder) +**Status:** P0–P2 ✅ · Phase A/B/B2 ✅ · **Phase C1–C3 ✅** · **Phase E ✅** (Semantik + Pfad-QA) **Bezüge:** `AI_PLANNING_KI_MULTISTAGE_FORECAST.md` · `AI_PROMPT_TARGET_ARCHITECTURE.md` · `SKILL_SCORING_SPEC.md` · `TRAINING_FRAMEWORK_SPEC.md` §3 (Progressionsgraph) --- @@ -191,6 +191,7 @@ Wenn `hits` leer oder Trainer wählt „Mit KI anlegen“: | **C1** | Graph auto-match + variantenbewusste Nachfolger | ✅ **0.8.183** | | **C2** | Varianten in Trefferliste / Picker | ✅ **0.8.184** | | **C3** | Graph-Builder (Ziel → Pfad → speichern) | ✅ **0.8.185** | +| **E** | Semantik-Schicht + Pfad-QA (Lücken/Brücken/LLM-QS) | ✅ **0.8.186** | | **D** | Neu-Anlage: Pack an `suggestExerciseAi` | 🔲 | --- @@ -446,4 +447,36 @@ Treffer: optional `hits[].suggested_variant_id`. --- -## 22. Backlog (offen) +## 22. Phase E — Semantik-Schicht + Pfad-QA (0.8.186) ✅ + +### Semantic Brief (`planning_exercise_semantics.py`) + +Parallel zum Katalog-Overlay — **nicht ersetzend**: + +| Feld | Bedeutung | +|------|-----------| +| `primary_topic` | z. B. `mae geri` | +| `must_phrases` / `exclude_phrases` | Phrasen-Match in Titel/Ziel/Varianten | +| `development_arc` | einstieg → … → perfektion | +| `semantic_strength` | 0–1 — steuert dynamisches Blend im Hybrid-Score | +| `retrieval_query` | fokussierte Volltext-Query (nicht ganzer Satz) | + +Optional LLM: Prompt `planning_exercise_query_semantics` (Migration **075**). + +**Hybrid-Score:** neuer Term `w_semantic * semantic_score` — Profil/Volltext werden bei hoher `semantic_strength` relativ abgeschwächt. + +### Pfad-QA (`planning_exercise_path_qa.py`) + +Nach Pfad-Bildung: + +1. **Lücken-Messung** zwischen benachbarten Schritten (Skill-Jaccard + Semantik zum erwarteten Phasen-Segment) +2. **Brücken-Übungen** bei großen Lücken (zusätzliche Schritte, markiert `is_bridge`) +3. **LLM-QS** (Prompt `planning_exercise_path_qa`): Reihenfolge, Themen-Abdeckung, Empfehlungen + +**API-Erweiterung** `progression-path-suggest`: `include_path_qa`, `include_llm_path_qa` · Response: `semantic_brief_summary`, `path_qa`. + +**Pfad-Schritte:** Semantic Brief + Entwicklungsphase in **allen** Schritten (nicht nur Schritt 1). + +--- + +## 23. Backlog (offen) diff --git a/backend/migrations/075_ai_prompt_planning_semantics_path_qa.sql b/backend/migrations/075_ai_prompt_planning_semantics_path_qa.sql new file mode 100644 index 0000000..6a1c280 --- /dev/null +++ b/backend/migrations/075_ai_prompt_planning_semantics_path_qa.sql @@ -0,0 +1,89 @@ +-- Migration 075: Planungs-KI Phase E — Semantik-Enrichment + Pfad-QA Prompts + +INSERT INTO ai_prompts ( + slug, display_name, description, template, + category, output_format, output_schema, is_system_default, default_template, active, sort_order +) +SELECT + 'planning_exercise_query_semantics', + 'Planungs-Übungssuche Semantik', + 'Erweitert deterministisches Semantic Brief um must/exclude phrases und Entwicklungsbogen.', + $t$Du bist Assistent für Kampfsport-Trainer bei der semantischen Analyse von Planungs-Anfragen. + +Ziel: JSON für ein Semantic Brief — präzise Kernbegriffe, Ausschlüsse, Entwicklungsbogen. +Nutze das bestehende Brief als Basis; ergänze/verfeinere, ersetze aber keine eindeutige Technik-Identität. + +Anfrage: {{search_query}} +Bestehendes Brief (deterministisch): {{semantic_brief_json}} + +Regeln: +- must_phrases: konkrete Technik-/Themen-Phrasen aus der Anfrage (z. B. "mae geri", nicht nur "geri") +- exclude_phrases: konkurrierende Techniken/Themen, die NICHT gemeint sind +- development_arc: geordnete Phasen aus: einstieg, grundlage, vertiefung, anwendung, perfektion +- semantic_strength: 0.0–1.0 (höher bei spezifischer Technik/Thema) +- primary_topic: Hauptthema in wenigen Worten +- topic_type: technique | focus | method | skill | general + +Antworte NUR mit JSON: +{ + "primary_topic": "Mae Geri", + "topic_type": "technique", + "must_phrases": ["mae geri"], + "exclude_phrases": ["mawashi geri", "sakuto geri"], + "development_arc": ["einstieg", "grundlage", "vertiefung", "perfektion"], + "semantic_strength": 0.9, + "rationale": "Kurz auf Deutsch" +}$t$, + 'training', + 'json', + '{"type":"object","properties":{"must_phrases":{"type":"array"},"exclude_phrases":{"type":"array"},"development_arc":{"type":"array"},"semantic_strength":{"type":"number"}}}'::jsonb, + true, + NULL, + true, + 12 +WHERE NOT EXISTS (SELECT 1 FROM ai_prompts WHERE slug = 'planning_exercise_query_semantics'); + +INSERT INTO ai_prompts ( + slug, display_name, description, template, + category, output_format, output_schema, is_system_default, default_template, active, sort_order +) +SELECT + 'planning_exercise_path_qa', + 'Planungs-Pfad QA', + 'Semantische Qualitätsprüfung eines vorgeschlagenen Übungspfads inkl. Lücken und Brücken.', + $t$Du bist Assistent für Kampfsport-Trainer und prüfst einen vorgeschlagenen Übungspfad. + +Ziel-Anfrage: {{goal_query}} +Semantic Brief: {{semantic_brief_json}} +Schritte (JSON): {{steps_json}} +Erkannte Lücken: {{gaps_json}} +Eingefügte Brücken: {{bridge_inserts_json}} + +Prüfe: +1. Deckt der Pfad das Hauptthema der Anfrage ab (nicht nur Oberbegriffe)? +2. Ist die Reihenfolge didaktisch sinnvoll (Einstieg → Vertiefung → Ziel)? +3. Sind Sprünge zwischen benachbarten Schritten zu groß? +4. Sind Brücken-Übungen sinnvoll oder überflüssig? +5. Fehlen wichtige Zwischenschritte? + +Antworte NUR mit JSON: +{ + "overall_ok": true, + "quality_score": 0.85, + "topic_coverage": "Kurz: wie gut das Hauptthema abgedeckt ist", + "issues": ["…"], + "sequence_notes": ["…"], + "recommendations": ["…"] +}$t$, + 'training', + 'json', + '{"type":"object","required":["overall_ok"],"properties":{"overall_ok":{"type":"boolean"},"quality_score":{"type":"number"},"issues":{"type":"array"},"sequence_notes":{"type":"array"},"recommendations":{"type":"array"}}}'::jsonb, + true, + NULL, + true, + 13 +WHERE NOT EXISTS (SELECT 1 FROM ai_prompts WHERE slug = 'planning_exercise_path_qa'); + +UPDATE ai_prompts SET default_template = template +WHERE slug IN ('planning_exercise_query_semantics', 'planning_exercise_path_qa') + AND (default_template IS NULL OR TRIM(default_template) = ''); diff --git a/backend/planning_exercise_path_builder.py b/backend/planning_exercise_path_builder.py index bce5b6b..dfe44f9 100644 --- a/backend/planning_exercise_path_builder.py +++ b/backend/planning_exercise_path_builder.py @@ -1,18 +1,33 @@ """ -Planungs-KI Phase C3: Pfad-Vorschläge für Progressionsgraphen. +Planungs-KI Phase C3/E: Pfad-Vorschläge für Progressionsgraphen. -Ziel-Freitext → iterative Hybrid-Suche (Schritt 1 mit optional LLM-Profil, Folgeschritte deterministisch). +Ziel-Freitext → semantisch gewichtete Schritte → Lücken/Brücken → optional LLM-QA. """ from __future__ import annotations -from typing import Any, Dict, List, Optional, Set, Tuple +from typing import Any, Callable, Dict, List, Optional, Set, Tuple from fastapi import HTTPException from pydantic import BaseModel, Field from tenant_context import TenantContext, library_content_visibility_sql from planning_exercise_profiles import PlanningTargetProfile +from planning_exercise_path_qa import ( + build_path_qa_summary, + detect_path_gaps, + insert_bridge_exercises, + try_llm_qa_progression_path, +) from planning_exercise_retrieval import run_multistage_planning_retrieval +from planning_exercise_semantics import ( + PlanningSemanticBrief, + apply_dynamic_retrieval_weights, + brief_to_summary_dict, + build_semantic_brief, + step_phase_for_index, + step_retrieval_query, + try_enrich_semantic_brief_with_llm, +) from planning_exercise_target_pipeline import build_planning_target_with_query_pipeline from planning_exercise_progression import apply_progression_context_to_pack from planning_exercise_suggest import ( @@ -30,23 +45,32 @@ class ProgressionPathSuggestRequest(BaseModel): query: str = Field(..., min_length=3, max_length=2000) max_steps: int = Field(default=5, ge=2, le=10) include_llm_intent: bool = True + include_path_qa: bool = True + include_llm_path_qa: bool = True progression_graph_id: Optional[int] = Field(default=None, ge=1) exercise_kind_any: Optional[List[str]] = None -def _pick_next_path_hit( +def _pick_best_path_hit( hits: List[Dict[str, Any]], used_exercise_ids: Set[int], ) -> Optional[Dict[str, Any]]: + best: Optional[Dict[str, Any]] = None + best_key: Tuple[float, float] = (-1.0, -1.0) for hit in hits: eid = int(hit["id"]) if eid in used_exercise_ids: continue - return hit - return None + sem = float(hit.get("semantic_score") or 0.0) + score = float(hit.get("score") or 0.0) + key = (sem, score) + if key > best_key: + best_key = key + best = hit + return best -def _hit_to_path_step(hit: Dict[str, Any]) -> Dict[str, Any]: +def _hit_to_path_step(hit: Dict[str, Any], *, is_bridge: bool = False) -> Dict[str, Any]: raw_vid = hit.get("suggested_variant_id") variant_id: Optional[int] = None if raw_vid is not None: @@ -56,17 +80,21 @@ def _hit_to_path_step(hit: Dict[str, Any]) -> Dict[str, Any]: variant_id = vid except (TypeError, ValueError): variant_id = None - return { + step = { "exercise_id": int(hit["id"]), "variant_id": variant_id, "title": hit.get("title"), "summary": hit.get("summary"), "score": hit.get("score"), + "semantic_score": hit.get("semantic_score"), "reasons": list(hit.get("reasons") or []), "variants": hit.get("variants") or [], "suggested_variant_id": hit.get("suggested_variant_id"), "suggested_variant_name": hit.get("suggested_variant_name"), } + if is_bridge: + step["is_bridge"] = True + return step def _run_path_step_retrieval( @@ -75,13 +103,22 @@ def _run_path_step_retrieval( tenant: TenantContext, goal_query: str, step_index: int, + max_steps: int, planned_ids: List[int], anchor_id: Optional[int], anchor_variant_id: Optional[int], progression_graph_id: Optional[int], include_llm_intent: bool, exercise_kind_any: Optional[List[str]], + semantic_brief: PlanningSemanticBrief, + bridge_mode: bool = False, + step_a: Optional[Dict[str, Any]] = None, + step_b: Optional[Dict[str, Any]] = None, ) -> Tuple[List[Dict[str, Any]], PlanningTargetProfile, Dict[str, Any], str]: + step_query = step_retrieval_query(semantic_brief, goal_query, step_index, max_steps) + if bridge_mode and step_a and step_b: + step_query = f"{semantic_brief.retrieval_query or goal_query} brücke zwischen schritten" + pack: Dict[str, Any] = { "unit_id": None, "unit": { @@ -94,14 +131,17 @@ def _run_path_step_retrieval( "group_name": None, "section_order_index": None, "section_title": None, - "section_guidance_notes": goal_query if step_index == 0 else None, + "section_guidance_notes": goal_query if step_index == 0 and not bridge_mode else step_query, "planned_exercise_ids": list(planned_ids), "anchor_exercise_id": anchor_id, "anchor_title": None, "anchor_skill_ids": sorted(_load_skill_ids_for_exercise(cur, anchor_id)), "group_recent_exercise_ids": [], "context_mode": "progression_path", - "has_planning_reference": bool(planned_ids or anchor_id), + "has_planning_reference": bool(planned_ids or anchor_id or bridge_mode), + "semantic_brief": semantic_brief, + "retrieval_query": step_query, + "path_step_phase": step_phase_for_index(semantic_brief, step_index, max_steps), } pack = apply_progression_context_to_pack( cur, @@ -111,14 +151,12 @@ def _run_path_step_retrieval( anchor_variant_id=anchor_variant_id, ) - if step_index == 0: + if step_index == 0 and not bridge_mode: heuristic_intent = resolve_planning_exercise_intent(goal_query, "free_search") - step_query = goal_query else: heuristic_intent = INTENT_SUGGEST_NEXT - step_query = "nächste sinnvolle übung im pfad" - has_plan_ref = bool(pack.get("has_planning_reference")) or step_index > 0 + has_plan_ref = bool(pack.get("has_planning_reference")) pipeline_context = { "unit_title": None, "group_name": None, @@ -142,14 +180,20 @@ def _run_path_step_retrieval( planned_exercise_ids=pack["planned_exercise_ids"], section_planned_exercise_ids=[], anchor_exercise_id=pack.get("anchor_exercise_id"), - query=goal_query if step_index == 0 else step_query, + query=goal_query if step_index == 0 and not bridge_mode else step_query, heuristic_intent=heuristic_intent, - include_llm_intent=include_llm_intent and step_index == 0, + include_llm_intent=include_llm_intent and step_index == 0 and not bridge_mode, context_summary=pipeline_context, has_planning_reference=has_plan_ref, ) - weights = _intent_weights(intent) + weights = apply_dynamic_retrieval_weights( + _intent_weights(intent), + semantic_brief, + scenario="free_search" if step_index == 0 and not bridge_mode else "progression", + has_planning_reference=has_plan_ref, + ) + profile_id = tenant.profile_id role = tenant.global_role vis_sql, vis_params = library_content_visibility_sql( @@ -163,7 +207,7 @@ def _run_path_step_retrieval( cur, vis_sql=vis_sql, vis_params=vis_params, - query=step_query if step_index > 0 else goal_query, + query=step_query, exercise_kind_any=exercise_kind_any, target=target_profile, intent=intent, @@ -174,6 +218,45 @@ def _run_path_step_retrieval( return hits, target_profile, query_intent_summary, intent +def _make_bridge_search_fn( + cur, + *, + tenant: TenantContext, + goal_query: str, + max_steps: int, + progression_graph_id: Optional[int], + include_llm_intent: bool, + exercise_kind_any: Optional[List[str]], + semantic_brief: PlanningSemanticBrief, + planned_ids: List[int], +) -> Callable[..., List[Dict[str, Any]]]: + def _bridge_search( + step_a: Dict[str, Any], + step_b: Dict[str, Any], + _gap: Dict[str, Any], + ) -> List[Dict[str, Any]]: + hits, _, _, _ = _run_path_step_retrieval( + cur, + tenant=tenant, + goal_query=goal_query, + step_index=1, + max_steps=max_steps, + planned_ids=list(planned_ids) + [int(step_a["exercise_id"])], + anchor_id=int(step_a["exercise_id"]), + anchor_variant_id=step_a.get("variant_id"), + progression_graph_id=progression_graph_id, + include_llm_intent=include_llm_intent, + exercise_kind_any=exercise_kind_any, + semantic_brief=semantic_brief, + bridge_mode=True, + step_a=step_a, + step_b=step_b, + ) + return hits + + return _bridge_search + + def suggest_progression_path( cur, *, @@ -189,6 +272,13 @@ def suggest_progression_path( raise HTTPException(status_code=400, detail="Ziel-Anfrage: mindestens 3 Zeichen") max_steps = int(body.max_steps) + semantic_brief = build_semantic_brief(goal_query) + semantic_llm_applied = False + if body.include_llm_intent and semantic_brief.semantic_strength >= 0.35: + semantic_brief, semantic_llm_applied = try_enrich_semantic_brief_with_llm( + cur, goal_query, semantic_brief + ) + used: Set[int] = set() steps: List[Dict[str, Any]] = [] planned_ids: List[int] = [] @@ -203,17 +293,19 @@ def suggest_progression_path( tenant=tenant, goal_query=goal_query, step_index=step_index, + max_steps=max_steps, planned_ids=planned_ids, anchor_id=anchor_id, anchor_variant_id=anchor_variant_id, progression_graph_id=body.progression_graph_id, include_llm_intent=body.include_llm_intent, exercise_kind_any=body.exercise_kind_any, + semantic_brief=semantic_brief, ) if step_index == 0: first_intent_summary = query_intent_summary - hit = _pick_next_path_hit(hits, used) + hit = _pick_best_path_hit(hits, used) if not hit: break @@ -231,7 +323,56 @@ def suggest_progression_path( detail="Zu wenig passende Übungen für einen Pfad (mindestens 2 Schritte). Ziel präzisieren oder max_steps senken.", ) + gaps: List[Dict[str, Any]] = [] + bridge_inserts: List[Dict[str, Any]] = [] + llm_qa: Optional[Dict[str, Any]] = None + llm_qa_applied = False + + if body.include_path_qa: + gaps = detect_path_gaps(cur, steps, brief=semantic_brief) + if gaps: + bridge_fn = _make_bridge_search_fn( + cur, + tenant=tenant, + goal_query=goal_query, + max_steps=max_steps, + progression_graph_id=body.progression_graph_id, + include_llm_intent=body.include_llm_intent, + exercise_kind_any=body.exercise_kind_any, + semantic_brief=semantic_brief, + planned_ids=planned_ids, + ) + steps, bridge_inserts = insert_bridge_exercises( + cur, + steps, + gaps, + brief=semantic_brief, + bridge_search_fn=bridge_fn, + ) + + if body.include_llm_path_qa: + llm_qa, llm_qa_applied = try_llm_qa_progression_path( + cur, + goal_query=goal_query, + brief=semantic_brief, + steps=steps, + gaps=gaps, + bridge_inserts=bridge_inserts, + ) + + path_qa = build_path_qa_summary( + gaps=gaps, + bridge_inserts=bridge_inserts, + llm_qa=llm_qa, + llm_applied=llm_qa_applied, + ) + target_profile_summary = target_profile.to_summary_dict(cur) if target_profile else None + retrieval_parts = ["profile_v1", "full_library", "path_builder", "semantics"] + if body.include_path_qa: + retrieval_parts.append("path_qa") + if llm_qa_applied: + retrieval_parts.append("llm_path_qa") return { "goal_query": goal_query, @@ -239,14 +380,21 @@ def suggest_progression_path( "steps": steps, "step_count": len(steps), "target_profile_summary": target_profile_summary, + "semantic_brief_summary": brief_to_summary_dict(semantic_brief), + "semantic_llm_applied": semantic_llm_applied, "query_intent_summary": first_intent_summary, "progression_graph_id": body.progression_graph_id, - "retrieval_phase": "profile_v1+full_library+path_builder", + "path_qa": path_qa, + "retrieval_phase": "+".join(retrieval_parts), } __all__ = [ "ProgressionPathSuggestRequest", "suggest_progression_path", + "_pick_best_path_hit", "_pick_next_path_hit", ] + +# Legacy-Alias für Tests +_pick_next_path_hit = _pick_best_path_hit diff --git a/backend/planning_exercise_path_qa.py b/backend/planning_exercise_path_qa.py new file mode 100644 index 0000000..b690cd1 --- /dev/null +++ b/backend/planning_exercise_path_qa.py @@ -0,0 +1,343 @@ +""" +Planungs-KI Phase E: Pfad-QA — Lücken erkennen, Brücken vorschlagen, LLM-Prüfung. +""" +from __future__ import annotations + +import json +import logging +import re +from typing import Any, Callable, Dict, List, Mapping, Optional, Sequence, Set, Tuple + +from ai_prompt_runtime import AiPromptUnavailableError, load_and_render_ai_prompt +from exercise_ai import strip_html_to_plain +from openrouter_chat import ( + effective_openrouter_model_for_prompt_row, + normalize_openrouter_env, + openrouter_chat_completion, +) + +from planning_exercise_semantics import ( + PlanningSemanticBrief, + brief_to_summary_dict, + score_exercise_semantic_relevance, + step_phase_for_index, +) + +_logger = logging.getLogger("shinkan.planning_exercise_path_qa") + +_GAP_SKILL_THRESHOLD = 0.10 +_GAP_SEMANTIC_THRESHOLD = 0.28 +_LARGE_GAP_SCORE = 0.52 +_MAX_BRIDGE_INSERTS = 4 + + +def _extract_json_object(text: str) -> Dict[str, Any]: + s = (text or "").strip() + if s.startswith("```"): + s = re.sub(r"^```[a-zA-Z0-9]*\s*", "", s) + if s.endswith("```"): + s = s[:-3].strip() + start = s.find("{") + end = s.rfind("}") + if start < 0 or end <= start: + raise ValueError("Kein JSON-Objekt in LLM-Antwort") + obj = json.loads(s[start : end + 1]) + if not isinstance(obj, dict): + raise ValueError("LLM-Antwort ist kein JSON-Objekt") + return obj + + +def _skill_jaccard(a: Set[int], b: Set[int]) -> float: + if not a or not b: + return 0.0 + inter = len(a & b) + union = len(a | b) + return inter / union if union else 0.0 + + +def _load_exercise_skill_ids(cur, exercise_id: int) -> Set[int]: + cur.execute( + "SELECT skill_id FROM exercise_skills WHERE exercise_id = %s", + (int(exercise_id),), + ) + return {int(r["skill_id"]) for r in cur.fetchall() if r.get("skill_id") is not None} + + +def _load_exercise_text_bundle(cur, exercise_id: int) -> Dict[str, Any]: + cur.execute( + "SELECT id, title, summary, goal FROM exercises WHERE id = %s", + (int(exercise_id),), + ) + row = cur.fetchone() + if not row: + return {"title": "", "summary": "", "goal": "", "variant_names": []} + cur.execute( + """ + SELECT variant_name FROM exercise_variants + WHERE exercise_id = %s + ORDER BY sequence_order ASC NULLS LAST, id ASC + LIMIT 8 + """, + (int(exercise_id),), + ) + variants = [str(r.get("variant_name") or "") for r in cur.fetchall()] + return { + "title": str(row.get("title") or ""), + "summary": str(row.get("summary") or ""), + "goal": str(row.get("goal") or ""), + "variant_names": variants, + } + + +def measure_step_transition_gap( + cur, + step_a: Mapping[str, Any], + step_b: Mapping[str, Any], + *, + brief: PlanningSemanticBrief, + segment_index: int, + total_segments: int, +) -> Dict[str, Any]: + eid_a = int(step_a["exercise_id"]) + eid_b = int(step_b["exercise_id"]) + skills_a = _load_exercise_skill_ids(cur, eid_a) + skills_b = _load_exercise_skill_ids(cur, eid_b) + skill_sim = _skill_jaccard(skills_a, skills_b) + + bundle_b = _load_exercise_text_bundle(cur, eid_b) + mid_phase = step_phase_for_index(brief, segment_index + 1, total_segments + 1) + sem_b, sem_reasons = score_exercise_semantic_relevance( + title=bundle_b["title"], + summary=bundle_b["summary"], + goal=bundle_b["goal"], + variant_names=bundle_b["variant_names"], + brief=brief, + step_phase=mid_phase, + ) + + gap_score = 0.0 + if skill_sim < _GAP_SKILL_THRESHOLD: + gap_score += 0.45 * (1.0 - skill_sim / max(_GAP_SKILL_THRESHOLD, 0.01)) + if sem_b < _GAP_SEMANTIC_THRESHOLD: + gap_score += 0.35 * (1.0 - sem_b / max(_GAP_SEMANTIC_THRESHOLD, 0.01)) + if brief.semantic_strength >= 0.5 and sem_b < 0.15: + gap_score += 0.2 + + gap_score = min(1.0, round(gap_score, 4)) + is_large = gap_score >= _LARGE_GAP_SCORE + + return { + "from_exercise_id": eid_a, + "to_exercise_id": eid_b, + "from_title": step_a.get("title"), + "to_title": step_b.get("title"), + "skill_similarity": round(skill_sim, 4), + "semantic_score_to": sem_b, + "gap_score": gap_score, + "is_large_gap": is_large, + "expected_phase": mid_phase, + "reasons": sem_reasons, + } + + +def detect_path_gaps( + cur, + steps: Sequence[Mapping[str, Any]], + *, + brief: PlanningSemanticBrief, +) -> List[Dict[str, Any]]: + if len(steps) < 2: + return [] + gaps: List[Dict[str, Any]] = [] + total_segments = len(steps) - 1 + for i in range(total_segments): + gap = measure_step_transition_gap( + cur, + steps[i], + steps[i + 1], + brief=brief, + segment_index=i, + total_segments=total_segments, + ) + if gap.get("is_large_gap"): + gaps.append(gap) + return gaps + + +def _pick_bridge_hit( + hits: Sequence[Mapping[str, Any]], + *, + used_ids: Set[int], + step_a_id: int, + step_b_id: int, +) -> Optional[Dict[str, Any]]: + for hit in hits: + eid = int(hit["id"]) + if eid in used_ids or eid in {step_a_id, step_b_id}: + continue + return dict(hit) + return None + + +def insert_bridge_exercises( + cur, + steps: List[Dict[str, Any]], + gaps: Sequence[Mapping[str, Any]], + *, + brief: PlanningSemanticBrief, + bridge_search_fn: Callable[..., List[Dict[str, Any]]], + max_inserts: int = _MAX_BRIDGE_INSERTS, +) -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]]]: + """ + Fügt zwischen großen Lücken Brücken-Übungen ein. + bridge_search_fn(from_step, to_step, gap) -> hits + """ + if not gaps: + return steps, [] + + used_ids = {int(s["exercise_id"]) for s in steps} + inserts: List[Dict[str, Any]] = [] + out = list(steps) + + gap_by_pair = { + (int(g["from_exercise_id"]), int(g["to_exercise_id"])): g for g in gaps + } + + i = 0 + while i < len(out) - 1 and len(inserts) < max_inserts: + a = out[i] + b = out[i + 1] + key = (int(a["exercise_id"]), int(b["exercise_id"])) + gap = gap_by_pair.get(key) + if not gap: + i += 1 + continue + + hits = bridge_search_fn(a, b, gap) + bridge_hit = _pick_bridge_hit( + hits, + used_ids=used_ids, + step_a_id=int(a["exercise_id"]), + step_b_id=int(b["exercise_id"]), + ) + if not bridge_hit: + i += 1 + continue + + bridge_step = { + "exercise_id": int(bridge_hit["id"]), + "variant_id": bridge_hit.get("suggested_variant_id"), + "title": bridge_hit.get("title"), + "summary": bridge_hit.get("summary"), + "score": bridge_hit.get("score"), + "reasons": list(bridge_hit.get("reasons") or []) + ["Brücken-Übung (Lückenfüller)"], + "variants": bridge_hit.get("variants") or [], + "suggested_variant_id": bridge_hit.get("suggested_variant_id"), + "suggested_variant_name": bridge_hit.get("suggested_variant_name"), + "is_bridge": True, + "bridge_for_gap": { + "from_exercise_id": int(a["exercise_id"]), + "to_exercise_id": int(b["exercise_id"]), + "gap_score": gap.get("gap_score"), + }, + } + out.insert(i + 1, bridge_step) + used_ids.add(int(bridge_step["exercise_id"])) + inserts.append( + { + "inserted_after_index": i, + "bridge_exercise_id": int(bridge_step["exercise_id"]), + "bridge_title": bridge_step.get("title"), + "gap": gap, + } + ) + i += 2 + + return out, inserts + + +def try_llm_qa_progression_path( + cur, + *, + goal_query: str, + brief: PlanningSemanticBrief, + steps: Sequence[Mapping[str, Any]], + gaps: Sequence[Mapping[str, Any]], + bridge_inserts: Sequence[Mapping[str, Any]], +) -> Tuple[Optional[Dict[str, Any]], bool]: + api_key, _ = normalize_openrouter_env() + if not api_key or len(steps) < 2: + return None, False + + step_payload = [] + for idx, step in enumerate(steps): + bundle = _load_exercise_text_bundle(cur, int(step["exercise_id"])) + step_payload.append( + { + "index": idx + 1, + "exercise_id": int(step["exercise_id"]), + "title": step.get("title") or bundle["title"], + "goal": strip_html_to_plain(bundle["goal"], max_len=400), + "is_bridge": bool(step.get("is_bridge")), + "reasons": list(step.get("reasons") or [])[:3], + } + ) + + variables = { + "goal_query": goal_query or "", + "semantic_brief_json": json.dumps(brief_to_summary_dict(brief), ensure_ascii=False), + "steps_json": json.dumps(step_payload, ensure_ascii=False), + "gaps_json": json.dumps(list(gaps), ensure_ascii=False), + "bridge_inserts_json": json.dumps(list(bridge_inserts), ensure_ascii=False), + } + + try: + prow, rendered = load_and_render_ai_prompt(cur, "planning_exercise_path_qa", variables) + model = effective_openrouter_model_for_prompt_row(prow) + raw = openrouter_chat_completion(api_key=api_key, model=model, user_content=rendered.text) + obj = _extract_json_object(raw) + return obj, True + except AiPromptUnavailableError: + return None, False + except Exception as exc: + _logger.warning("Pfad-QA-LLM fehlgeschlagen: %s", exc) + return None, False + + +def build_path_qa_summary( + *, + gaps: Sequence[Mapping[str, Any]], + bridge_inserts: Sequence[Mapping[str, Any]], + llm_qa: Optional[Mapping[str, Any]], + llm_applied: bool, +) -> Dict[str, Any]: + summary: Dict[str, Any] = { + "gap_count": len(gaps), + "large_gaps": list(gaps), + "bridge_insert_count": len(bridge_inserts), + "bridge_inserts": list(bridge_inserts), + "llm_qa_applied": llm_applied, + } + if llm_qa: + summary["overall_ok"] = bool(llm_qa.get("overall_ok", True)) + summary["quality_score"] = llm_qa.get("quality_score") + summary["issues"] = list(llm_qa.get("issues") or []) + summary["sequence_notes"] = list(llm_qa.get("sequence_notes") or []) + summary["topic_coverage"] = llm_qa.get("topic_coverage") + summary["recommendations"] = list(llm_qa.get("recommendations") or []) + else: + summary["overall_ok"] = len(gaps) == 0 + summary["issues"] = [ + f"Lücke zwischen „{g.get('from_title')}“ und „{g.get('to_title')}“ (Score {g.get('gap_score')})" + for g in gaps + ] if gaps else [] + return summary + + +__all__ = [ + "build_path_qa_summary", + "detect_path_gaps", + "insert_bridge_exercises", + "measure_step_transition_gap", + "try_llm_qa_progression_path", +] diff --git a/backend/planning_exercise_retrieval.py b/backend/planning_exercise_retrieval.py index c027299..2194b02 100644 --- a/backend/planning_exercise_retrieval.py +++ b/backend/planning_exercise_retrieval.py @@ -14,6 +14,7 @@ from planning_exercise_profiles import ( load_exercise_match_profiles_bulk, score_exercise_against_target, ) +from planning_exercise_semantics import PlanningSemanticBrief, score_exercise_semantic_relevance _MAX_LIBRARY_ROWS = 8000 _PROFILE_LOAD_BATCH = 400 @@ -133,6 +134,44 @@ def _load_skill_sets_chunked(cur, exercise_ids: Sequence[int], *, batch: int = _ return out +def _load_exercise_goals_chunked(cur, exercise_ids: Sequence[int], *, batch: int = _PROFILE_LOAD_BATCH) -> Dict[int, str]: + ids = sorted({int(x) for x in exercise_ids if int(x) > 0}) + out: Dict[int, str] = {} + if not ids: + return out + for i in range(0, len(ids), batch): + chunk = ids[i : i + batch] + ph = ",".join(["%s"] * len(chunk)) + cur.execute(f"SELECT id, goal FROM exercises WHERE id IN ({ph})", chunk) + for row in cur.fetchall(): + out[int(row["id"])] = str(row.get("goal") or "") + return out + + +def _load_variant_names_chunked(cur, exercise_ids: Sequence[int], *, batch: int = _PROFILE_LOAD_BATCH) -> Dict[int, List[str]]: + ids = sorted({int(x) for x in exercise_ids if int(x) > 0}) + out: Dict[int, List[str]] = {eid: [] for eid in ids} + if not ids: + return out + for i in range(0, len(ids), batch): + chunk = ids[i : i + batch] + ph = ",".join(["%s"] * len(chunk)) + cur.execute( + f""" + SELECT exercise_id, variant_name FROM exercise_variants + WHERE exercise_id IN ({ph}) + ORDER BY sequence_order ASC NULLS LAST, id ASC + """, + chunk, + ) + for row in cur.fetchall(): + eid = int(row["exercise_id"]) + name = str(row.get("variant_name") or "").strip() + if name: + out.setdefault(eid, []).append(name[:80]) + return out + + def rank_visible_library_hits( cur, rows: Sequence[Dict[str, Any]], @@ -151,6 +190,11 @@ def rank_visible_library_hits( anchor_id = pack.get("anchor_exercise_id") progression_notes = pack.get("progression_edge_notes") or {} requires_partner = pack.get("requires_partner") + semantic_brief_raw = pack.get("semantic_brief") + semantic_brief: Optional[PlanningSemanticBrief] = None + if isinstance(semantic_brief_raw, PlanningSemanticBrief): + semantic_brief = semantic_brief_raw + step_phase = pack.get("path_step_phase") last_planned_skills: Set[int] = set() planned_ids = pack.get("planned_exercise_ids") or [] @@ -175,6 +219,11 @@ def rank_visible_library_hits( cand_ids = [int(r["id"]) for r in cand_rows] match_profiles = _load_match_profiles_chunked(cur, cand_ids) skills_by_ex = _load_skill_sets_chunked(cur, cand_ids) + goals_by_ex: Dict[int, str] = {} + variants_by_ex: Dict[int, List[str]] = {} + if semantic_brief and semantic_brief.semantic_strength > 0.05: + goals_by_ex = _load_exercise_goals_chunked(cur, cand_ids) + variants_by_ex = _load_variant_names_chunked(cur, cand_ids) max_ft = 0.0 scored_items: List[Dict[str, Any]] = [] @@ -213,8 +262,21 @@ def rank_visible_library_hits( emp, target, intent=intent ) + semantic_score = 0.0 + semantic_reasons: List[str] = [] + if semantic_brief and semantic_brief.semantic_strength > 0.05: + semantic_score, semantic_reasons = score_exercise_semantic_relevance( + title=str(row.get("title") or ""), + summary=str(row.get("summary") or ""), + goal=goals_by_ex.get(eid, ""), + variant_names=variants_by_ex.get(eid, []), + brief=semantic_brief, + step_phase=step_phase, + ) + score = ( - weights["fulltext"] * ft_norm + weights.get("semantic", 0.0) * semantic_score + + weights["fulltext"] * ft_norm + weights["progression"] * prog_hit + weights["skill"] * skill_sim + weights["plan"] * plan_aff @@ -224,6 +286,10 @@ def rank_visible_library_hits( ) reasons: List[str] = [] + if semantic_score >= 0.35 and semantic_reasons: + for sr in semantic_reasons: + if sr not in reasons: + reasons.append(sr) if query and ft_norm >= 0.35: reasons.append("Volltext-Treffer") if prog_hit > 0: @@ -255,6 +321,7 @@ def rank_visible_library_hits( "focus_area": row.get("primary_focus_name"), "score": round(max(0.0, min(1.0, score)), 4), "reasons": reasons, + "semantic_score": round(semantic_score, 4), } ) succ_variants = pack.get("progression_successor_variants") or {} @@ -283,7 +350,7 @@ def run_multistage_planning_retrieval( cur, vis_sql=vis_sql, vis_params=vis_params, - query=query, + query=pack.get("retrieval_query") or query, exercise_kind_any=exercise_kind_any, ) hits, skills_by_ex = rank_visible_library_hits( diff --git a/backend/planning_exercise_semantics.py b/backend/planning_exercise_semantics.py new file mode 100644 index 0000000..46c2ef1 --- /dev/null +++ b/backend/planning_exercise_semantics.py @@ -0,0 +1,492 @@ +""" +Planungs-KI Phase E: Semantik-Schicht für Anfrage-Verständnis und Retrieval. + +Trennt anfrage-spezifische Semantik (Technik, Phrasen, Entwicklungsbogen) vom +Katalog-Profil-Overlay (Fokus/Skills). Wird in Hybrid-Retrieval und Pfad-QA genutzt. +""" +from __future__ import annotations + +import json +import logging +import re +from typing import Any, Dict, List, Mapping, Optional, Sequence, Tuple + +from pydantic import BaseModel, Field, field_validator + +from ai_prompt_runtime import AiPromptUnavailableError, load_and_render_ai_prompt +from exercise_ai import strip_html_to_plain +from openrouter_chat import ( + effective_openrouter_model_for_prompt_row, + normalize_openrouter_env, + openrouter_chat_completion, +) + +_logger = logging.getLogger("shinkan.planning_exercise_semantics") + +_GERI_TECHNIQUES: Tuple[Tuple[str, Tuple[str, ...]], ...] = ( + ("mae geri", ("mawashi geri", "yoko geri", "ushiro geri", "sakuto geri", "mikazuki geri")), + ("mawashi geri", ("mae geri", "yoko geri", "ushiro geri", "sakuto geri")), + ("yoko geri", ("mae geri", "mawashi geri", "ushiro geri", "sakuto geri")), + ("ushiro geri", ("mae geri", "mawashi geri", "yoko geri", "sakuto geri")), + ("sakuto geri", ("mae geri", "mawashi geri", "yoko geri", "mikazuki geri")), + ("mikazuki geri", ("mae geri", "mawashi geri", "sakuto geri")), +) + +_OTHER_TECHNIQUE_PATTERNS: Tuple[Tuple[str, Tuple[str, ...]], ...] = ( + ("oi zuki", ("gyaku zuki", "age uke", "gedan barai")), + ("gyaku zuki", ("oi zuki", "mae geri")), + ("age uke", ("gedan barai", "soto uke")), + ("gedan barai", ("age uke", "soto uke")), +) + +_ARC_PHASES: Tuple[Tuple[str, Tuple[str, ...]], ...] = ( + ("einstieg", ("einstieg", "erlernen", "lernen", "anfänger", "anfaenger", "beginn", "grund")), + ("grundlage", ("grundlage", "fundament", "basis", "basic")), + ("vertiefung", ("vertief", "festigung", "übung", "uebung", "wiederhol")), + ("anwendung", ("anwend", "partner", "kampf", "kumite", "reaktion")), + ("perfektion", ("perfekt", "meisterschaft", "höchst", "hoechst", "kime", "sauber")), +) + +_PHASE_QUERY_HINTS: Dict[str, str] = { + "einstieg": "einstieg grundübung einfach", + "grundlage": "grundtechnik festigung", + "vertiefung": "vertiefung technik übung", + "anwendung": "anwendung partner variante", + "perfektion": "perfektion kontrolle kime höchste stufe", +} + +_QUERY_STOPWORDS = frozenset( + { + "von", + "bis", + "zur", + "zum", + "der", + "die", + "das", + "des", + "den", + "dem", + "ein", + "eine", + "einer", + "eines", + "und", + "oder", + "mit", + "für", + "fuer", + "im", + "in", + "am", + "an", + "auf", + "aus", + "beim", + "nach", + "vor", + "über", + "ueber", + "unter", + "wie", + "was", + "wo", + "wir", + "soll", + "sollen", + "bitte", + "schlage", + "vorschlag", + "übung", + "uebung", + "übungen", + "uebungen", + } +) + + +class PlanningSemanticBrief(BaseModel): + primary_topic: Optional[str] = Field(default=None, max_length=120) + topic_type: str = Field(default="general", max_length=40) + must_phrases: List[str] = Field(default_factory=list) + exclude_phrases: List[str] = Field(default_factory=list) + development_arc: List[str] = Field(default_factory=list) + retrieval_query: str = Field(default="", max_length=500) + semantic_strength: float = Field(default=0.0, ge=0.0, le=1.0) + rationale: Optional[str] = Field(default=None, max_length=400) + + @field_validator("topic_type") + @classmethod + def _topic_type(cls, v: str) -> str: + s = (v or "general").strip().lower() + return s if s in {"general", "technique", "focus", "method", "skill"} else "general" + + @field_validator("must_phrases", "exclude_phrases", "development_arc", mode="before") + @classmethod + def _norm_phrase_list(cls, v: Any) -> List[str]: + if not v: + return [] + if isinstance(v, str): + s = _normalize_phrase(v) + return [s] if s else [] + out: List[str] = [] + for item in v: + s = _normalize_phrase(str(item or "")) + if s and s not in out: + out.append(s[:120]) + return out[:12] + + +def _normalize_phrase(text: str) -> str: + return re.sub(r"\s+", " ", (text or "").strip().lower()) + + +def _normalize_query(text: str) -> str: + return re.sub(r"\s+", " ", (text or "").strip()) + + +def _extract_json_object(text: str) -> Dict[str, Any]: + s = (text or "").strip() + if s.startswith("```"): + s = re.sub(r"^```[a-zA-Z0-9]*\s*", "", s) + if s.endswith("```"): + s = s[:-3].strip() + start = s.find("{") + end = s.rfind("}") + if start < 0 or end <= start: + raise ValueError("Kein JSON-Objekt in LLM-Antwort") + obj = json.loads(s[start : end + 1]) + if not isinstance(obj, dict): + raise ValueError("LLM-Antwort ist kein JSON-Objekt") + return obj + + +def _find_technique_in_text(q_lower: str) -> Optional[Tuple[str, Tuple[str, ...]]]: + for primary, excludes in _GERI_TECHNIQUES + _OTHER_TECHNIQUE_PATTERNS: + if primary in q_lower: + return primary, excludes + return None + + +def _detect_development_arc(q_lower: str) -> List[str]: + found: List[str] = [] + for phase, markers in _ARC_PHASES: + if any(m in q_lower for m in markers): + if phase not in found: + found.append(phase) + if not found and ("von" in q_lower and "bis" in q_lower): + found = ["einstieg", "perfektion"] + return found + + +def _keyword_phrases_from_query(query: str) -> List[str]: + q = _normalize_query(query).lower() + tokens = re.findall(r"[a-zäöüß]{3,}", q, flags=re.IGNORECASE) + phrases: List[str] = [] + for i, tok in enumerate(tokens): + low = tok.lower() + if low in _QUERY_STOPWORDS: + continue + if i + 1 < len(tokens): + nxt = tokens[i + 1].lower() + if nxt not in _QUERY_STOPWORDS: + pair = _normalize_phrase(f"{low} {nxt}") + if len(pair) >= 5 and pair not in phrases: + phrases.append(pair) + if len(low) >= 4 and low not in phrases: + phrases.append(low) + return phrases[:6] + + +def build_semantic_brief(query: Optional[str]) -> PlanningSemanticBrief: + """Deterministisches Anfrage-Verständnis — ohne LLM.""" + q = _normalize_query(query) + if not q: + return PlanningSemanticBrief(retrieval_query="", semantic_strength=0.0) + + q_lower = q.lower() + must: List[str] = [] + exclude: List[str] = [] + topic_type = "general" + primary: Optional[str] = None + strength = 0.25 + + technique = _find_technique_in_text(q_lower) + if technique: + primary, ex = technique + must.append(primary) + exclude.extend(list(ex)) + topic_type = "technique" + strength = max(strength, 0.82) + + arc = _detect_development_arc(q_lower) + if arc: + strength = max(strength, 0.55 if technique else 0.45) + + extra_phrases = _keyword_phrases_from_query(q) + for ph in extra_phrases: + if ph not in must and not any(ph in m or m in ph for m in must): + if len(ph) >= 5: + must.append(ph) + + if len(q) >= 24 and not technique: + strength = max(strength, 0.4) + + retrieval = " ".join(must[:4]) if must else q + if arc and primary: + retrieval = f"{primary} {' '.join(arc[:2])}" + + return PlanningSemanticBrief( + primary_topic=primary, + topic_type=topic_type, + must_phrases=must[:8], + exclude_phrases=exclude[:10], + development_arc=arc[:5], + retrieval_query=retrieval[:500], + semantic_strength=min(1.0, round(strength, 3)), + rationale=None, + ) + + +def merge_semantic_brief_llm( + base: PlanningSemanticBrief, + llm_obj: Mapping[str, Any], +) -> PlanningSemanticBrief: + """LLM-Enrichment in deterministisches Brief mergen (LLM ergänzt, ersetzt nicht harte Technik-Regeln).""" + data = base.model_dump() + for key in ("primary_topic", "topic_type", "rationale"): + val = llm_obj.get(key) + if val: + data[key] = val + + for key in ("must_phrases", "exclude_phrases", "development_arc"): + extra = llm_obj.get(key) or [] + merged = list(data.get(key) or []) + for item in extra: + s = _normalize_phrase(str(item or "")) + if s and s not in merged: + merged.append(s) + data[key] = merged[:12] + + llm_strength = llm_obj.get("semantic_strength") + if llm_strength is not None: + try: + data["semantic_strength"] = min( + 1.0, + max(float(data["semantic_strength"]), float(llm_strength)), + ) + except (TypeError, ValueError): + pass + + if data.get("must_phrases"): + data["retrieval_query"] = " ".join(data["must_phrases"][:4])[:500] + out = PlanningSemanticBrief.model_validate(data) + if out.primary_topic and out.topic_type == "general": + out = out.model_copy(update={"topic_type": "technique"}) + return out + + +def try_enrich_semantic_brief_with_llm( + cur, + query: str, + base: PlanningSemanticBrief, +) -> Tuple[PlanningSemanticBrief, bool]: + api_key, _ = normalize_openrouter_env() + if not api_key or base.semantic_strength < 0.35: + return base, False + if not (query or "").strip(): + return base, False + + variables = { + "search_query": (query or "").strip(), + "semantic_brief_json": json.dumps(brief_to_summary_dict(base), ensure_ascii=False), + } + try: + prow, rendered = load_and_render_ai_prompt(cur, "planning_exercise_query_semantics", variables) + model = effective_openrouter_model_for_prompt_row(prow) + raw = openrouter_chat_completion(api_key=api_key, model=model, user_content=rendered.text) + obj = _extract_json_object(raw) + return merge_semantic_brief_llm(base, obj), True + except AiPromptUnavailableError: + return base, False + except Exception as exc: + _logger.warning("Semantik-LLM fehlgeschlagen: %s", exc) + return base, False + + +def brief_to_summary_dict(brief: PlanningSemanticBrief) -> Dict[str, Any]: + return { + "primary_topic": brief.primary_topic, + "topic_type": brief.topic_type, + "must_phrases": list(brief.must_phrases), + "exclude_phrases": list(brief.exclude_phrases), + "development_arc": list(brief.development_arc), + "retrieval_query": brief.retrieval_query, + "semantic_strength": brief.semantic_strength, + "rationale": brief.rationale, + } + + +def step_phase_for_index(brief: PlanningSemanticBrief, step_index: int, max_steps: int) -> Optional[str]: + arc = list(brief.development_arc or []) + if not arc: + if max_steps <= 1: + return None + default_arc = ["einstieg", "grundlage", "vertiefung", "anwendung", "perfektion"] + arc = default_arc[:max_steps] if brief.semantic_strength >= 0.5 else [] + if not arc: + return None + if len(arc) == 1: + return arc[0] + pos = step_index / max(max_steps - 1, 1) + idx = min(len(arc) - 1, int(round(pos * (len(arc) - 1)))) + return arc[idx] + + +def step_retrieval_query( + brief: PlanningSemanticBrief, + goal_query: str, + step_index: int, + max_steps: int, +) -> str: + phase = step_phase_for_index(brief, step_index, max_steps) + parts: List[str] = [] + if brief.retrieval_query: + parts.append(brief.retrieval_query) + elif goal_query: + parts.append(goal_query) + if brief.primary_topic and brief.primary_topic not in " ".join(parts).lower(): + parts.append(brief.primary_topic) + if phase: + hint = _PHASE_QUERY_HINTS.get(phase, phase) + parts.append(hint) + return _normalize_query(" ".join(parts)) or _normalize_query(goal_query) + + +def apply_dynamic_retrieval_weights( + base_weights: Mapping[str, float], + brief: PlanningSemanticBrief, + *, + scenario: str, + has_planning_reference: bool, +) -> Dict[str, float]: + """Semantik-Kanal dynamisch gegen Profil/Plan abwägen.""" + out = dict(base_weights) + sem = float(brief.semantic_strength or 0.0) + if sem <= 0.05: + out.setdefault("semantic", 0.0) + return out + + query_driven = scenario == "free_search" or not has_planning_reference + sem_weight = 0.12 + sem * (0.38 if query_driven else 0.22) + out["semantic"] = round(sem_weight, 4) + + if query_driven: + scale = 1.0 - sem * 0.35 + out["fulltext"] = round(float(out.get("fulltext", 0.18)) * scale, 4) + out["profile"] = round(float(out.get("profile", 0.22)) * (1.0 - sem * 0.25), 4) + else: + out["fulltext"] = round(float(out.get("fulltext", 0.18)) * (1.0 - sem * 0.15), 4) + + total = sum(v for k, v in out.items() if k not in {"repeat_unit", "repeat_group"} and v > 0) + if total > 0.92: + factor = 0.88 / total + for k in list(out.keys()): + if k in {"repeat_unit", "repeat_group"}: + continue + if out[k] > 0: + out[k] = round(out[k] * factor, 4) + return out + + +def _blob_from_fields( + title: str, + summary: str, + goal: str, + variant_names: Sequence[str], +) -> str: + parts = [title or "", strip_html_to_plain(summary, max_len=600), strip_html_to_plain(goal, max_len=800)] + parts.extend(variant_names or []) + return " ".join(p for p in parts if p).lower() + + +def _phrase_in_blob(phrase: str, blob: str) -> bool: + ph = _normalize_phrase(phrase) + if not ph or not blob: + return False + if ph in blob: + return True + if " " not in ph: + return bool(re.search(rf"\b{re.escape(ph)}\b", blob)) + return ph in blob + + +def score_exercise_semantic_relevance( + *, + title: str, + summary: str, + goal: str, + variant_names: Sequence[str], + brief: PlanningSemanticBrief, + step_phase: Optional[str] = None, +) -> Tuple[float, List[str]]: + if brief.semantic_strength <= 0.05: + return 0.0, [] + + blob = _blob_from_fields(title, summary, goal, variant_names) + if not blob.strip(): + return 0.0, [] + + reasons: List[str] = [] + must = list(brief.must_phrases or []) + exclude = list(brief.exclude_phrases or []) + + must_hits = sum(1 for ph in must if _phrase_in_blob(ph, blob)) + exclude_hits = sum(1 for ph in exclude if _phrase_in_blob(ph, blob)) + + score = 0.0 + if must: + must_ratio = must_hits / len(must) + score += 0.55 * must_ratio + if must_hits == len(must): + reasons.append("Alle Kernbegriffe der Anfrage im Übungstext") + elif must_hits > 0: + reasons.append("Teilweise passende Kernbegriffe") + elif brief.primary_topic and _phrase_in_blob(brief.primary_topic, blob): + score += 0.45 + reasons.append(f"Thema „{brief.primary_topic}“ im Übungstext") + elif brief.primary_topic and _phrase_in_blob(brief.primary_topic, blob): + score += 0.5 + reasons.append(f"Thema „{brief.primary_topic}“ im Übungstext") + + if exclude_hits > 0: + penalty = min(0.55, 0.18 * exclude_hits) + if must_hits == 0 or exclude_hits >= must_hits: + score -= penalty + reasons.append("Enthält ausgeschlossene Nebenthemen") + + if step_phase and step_phase in _PHASE_QUERY_HINTS: + phase_markers = next((markers for phase, markers in _ARC_PHASES if phase == step_phase), ()) + if any(m in blob for m in phase_markers) or step_phase in blob: + score += 0.12 + reasons.append(f"Passt zur Pfad-Phase „{step_phase}“") + + if brief.development_arc and not step_phase: + arc_hits = sum(1 for phase in brief.development_arc if phase in blob) + if arc_hits: + score += min(0.15, 0.05 * arc_hits) + + return max(0.0, min(1.0, round(score, 4))), reasons[:4] + + +__all__ = [ + "PlanningSemanticBrief", + "apply_dynamic_retrieval_weights", + "brief_to_summary_dict", + "build_semantic_brief", + "merge_semantic_brief_llm", + "score_exercise_semantic_relevance", + "step_phase_for_index", + "step_retrieval_query", + "try_enrich_semantic_brief_with_llm", +] diff --git a/backend/planning_exercise_suggest.py b/backend/planning_exercise_suggest.py index 436c2c8..2a791ff 100644 --- a/backend/planning_exercise_suggest.py +++ b/backend/planning_exercise_suggest.py @@ -21,6 +21,14 @@ from planning_exercise_target_pipeline import ( compose_retrieval_phase, should_run_llm_rank_pipeline, ) +from planning_exercise_semantics import ( + PlanningSemanticBrief, + apply_dynamic_retrieval_weights, + brief_to_summary_dict, + build_semantic_brief, + step_retrieval_query, + try_enrich_semantic_brief_with_llm, +) # Planungs-Berechtigung + Sektionen (bestehende Implementierung) from routers.training_planning import ( @@ -89,6 +97,7 @@ def resolve_planning_exercise_intent(query: Optional[str], intent_hint: Optional def _intent_weights(intent: str) -> Dict[str, float]: base = { "fulltext": 0.18, + "semantic": 0.0, "progression": 0.18, "skill": 0.12, "plan": 0.08, @@ -714,12 +723,25 @@ def suggest_planning_exercises( context_summary=pipeline_context, has_planning_reference=has_plan_ref, ) - weights = _intent_weights(intent) target_profile_summary = target_profile.to_summary_dict(cur) query_intent_applied = bool(query_intent_summary.get("llm_applied")) llm_expectation_applied = bool(query_intent_summary.get("llm_expectation_applied")) profile_llm_applied = bool(query_intent_summary.get("profile_llm_applied")) + semantic_brief = build_semantic_brief(query) + semantic_llm_applied = False + if body.include_llm_intent and semantic_brief.semantic_strength >= 0.35: + semantic_brief, semantic_llm_applied = try_enrich_semantic_brief_with_llm( + cur, query, semantic_brief + ) + + weights = apply_dynamic_retrieval_weights( + _intent_weights(intent), + semantic_brief, + scenario=scenario_kind, + has_planning_reference=has_plan_ref, + ) + profile_id = tenant.profile_id role = tenant.global_role vis_sql, vis_params = library_content_visibility_sql( @@ -741,6 +763,8 @@ def suggest_planning_exercises( pack={ **pack, "requires_partner": query_intent_summary.get("requires_partner"), + "semantic_brief": semantic_brief, + "retrieval_query": semantic_brief.retrieval_query or query, }, ) @@ -755,6 +779,7 @@ def suggest_planning_exercises( query_intent=query_intent_applied, llm_expectation=llm_expectation_applied, llm_rank=False, + semantics=semantic_brief.semantic_strength >= 0.35, ) run_llm_rank = should_run_llm_rank_pipeline( query, @@ -792,6 +817,7 @@ def suggest_planning_exercises( query_intent=query_intent_applied, llm_expectation=llm_expectation_applied, llm_rank=True, + semantics=semantic_brief.semantic_strength >= 0.35, ) tail = hits[pre_limit:] hits = pool_hits + tail @@ -837,6 +863,8 @@ def suggest_planning_exercises( "llm_intent_applied": query_intent_applied, "llm_expectation_applied": llm_expectation_applied, "profile_llm_applied": profile_llm_applied, + "semantic_brief_summary": brief_to_summary_dict(semantic_brief), + "semantic_llm_applied": semantic_llm_applied, "intent_resolved": intent, "intent_heuristic": heuristic_intent, "query_normalized": query or None, diff --git a/backend/planning_exercise_target_pipeline.py b/backend/planning_exercise_target_pipeline.py index 3846c34..8293d52 100644 --- a/backend/planning_exercise_target_pipeline.py +++ b/backend/planning_exercise_target_pipeline.py @@ -430,12 +430,15 @@ def compose_retrieval_phase( query_intent: bool = False, llm_expectation: bool = False, llm_rank: bool = False, + semantics: bool = False, ) -> str: parts = ["profile_v1"] if full_library or profile_preselect: parts.append("full_library") if text_signals: parts.append("text_signals") + if semantics: + parts.append("semantics") if llm_expectation: parts.append("llm_expectation") elif query_intent: diff --git a/backend/tests/test_planning_exercise_path_builder.py b/backend/tests/test_planning_exercise_path_builder.py index 2d45692..12a2a3d 100644 --- a/backend/tests/test_planning_exercise_path_builder.py +++ b/backend/tests/test_planning_exercise_path_builder.py @@ -1,11 +1,11 @@ -"""Tests Planungs-KI Phase C3 — Pfad-Vorschläge.""" -from planning_exercise_path_builder import _pick_next_path_hit, _hit_to_path_step +"""Tests Planungs-KI Phase C3/E — Pfad-Vorschläge.""" +from planning_exercise_path_builder import _pick_best_path_hit, _hit_to_path_step def test_pick_next_path_hit_skips_used(): - hits = [{"id": 1, "title": "A"}, {"id": 2, "title": "B"}, {"id": 3, "title": "C"}] - assert _pick_next_path_hit(hits, {1})["id"] == 2 - assert _pick_next_path_hit(hits, {1, 2, 3}) is None + hits = [{"id": 1, "title": "A", "semantic_score": 0.2}, {"id": 2, "title": "B", "semantic_score": 0.2}, {"id": 3, "title": "C", "semantic_score": 0.2}] + assert _pick_best_path_hit(hits, {1})["id"] == 2 + assert _pick_best_path_hit(hits, {1, 2, 3}) is None def test_hit_to_path_step_maps_variant(): diff --git a/backend/tests/test_planning_exercise_path_qa.py b/backend/tests/test_planning_exercise_path_qa.py new file mode 100644 index 0000000..1133db8 --- /dev/null +++ b/backend/tests/test_planning_exercise_path_qa.py @@ -0,0 +1,16 @@ +"""Tests Planungs-KI Phase E — Pfad-QA.""" +from planning_exercise_path_builder import _pick_best_path_hit + + +def test_pick_best_path_hit_prefers_semantic_score(): + hits = [ + {"id": 1, "title": "Mawashi", "score": 0.9, "semantic_score": 0.1}, + {"id": 2, "title": "Mae Geri", "score": 0.75, "semantic_score": 0.85}, + ] + chosen = _pick_best_path_hit(hits, set()) + assert chosen["id"] == 2 + + +def test_pick_best_path_hit_skips_used(): + hits = [{"id": 1, "title": "A", "score": 0.5, "semantic_score": 0.5}] + assert _pick_best_path_hit(hits, {1}) is None diff --git a/backend/tests/test_planning_exercise_semantics.py b/backend/tests/test_planning_exercise_semantics.py new file mode 100644 index 0000000..6337204 --- /dev/null +++ b/backend/tests/test_planning_exercise_semantics.py @@ -0,0 +1,67 @@ +"""Tests Planungs-KI Phase E — Semantik-Schicht.""" +from planning_exercise_semantics import ( + apply_dynamic_retrieval_weights, + build_semantic_brief, + score_exercise_semantic_relevance, + step_retrieval_query, +) + + +def test_build_semantic_brief_mae_geri(): + brief = build_semantic_brief( + "Von Erlernen bis zur Perfektion, des Fußtritts Mae Geri" + ) + assert brief.primary_topic == "mae geri" + assert "mae geri" in brief.must_phrases + assert "mawashi geri" in brief.exclude_phrases + assert brief.semantic_strength >= 0.8 + assert "einstieg" in brief.development_arc or "perfektion" in brief.development_arc + + +def test_semantic_score_prefers_mae_over_mawashi(): + brief = build_semantic_brief("Mae Geri Perfektion") + mae_score, _ = score_exercise_semantic_relevance( + title="Mae Geri — Frontkick Grundstellung", + summary="Frontkick von vorn", + goal="Sauberer Mae Geri", + variant_names=[], + brief=brief, + ) + mawashi_score, _ = score_exercise_semantic_relevance( + title="Mawashi Geri — Rundkick", + summary="Rundkick Technik", + goal="Mawashi Geri Höhe", + variant_names=[], + brief=brief, + ) + assert mae_score > mawashi_score + + +def test_dynamic_weights_boost_semantic_for_query_only(): + brief = build_semantic_brief("Mae Geri bis Perfektion") + base = { + "fulltext": 0.45, + "semantic": 0.0, + "progression": 0.08, + "skill": 0.08, + "plan": 0.08, + "profile": 0.15, + "repeat_unit": -0.3, + "repeat_group": -0.15, + } + out = apply_dynamic_retrieval_weights( + base, + brief, + scenario="free_search", + has_planning_reference=False, + ) + assert out["semantic"] > 0.25 + assert out["fulltext"] < base["fulltext"] + + +def test_step_retrieval_query_carries_topic_and_phase(): + brief = build_semantic_brief("Mae Geri von Einstieg bis Perfektion") + q0 = step_retrieval_query(brief, brief.retrieval_query, 0, 5) + q4 = step_retrieval_query(brief, brief.retrieval_query, 4, 5) + assert "mae geri" in q0.lower() + assert q0 != q4 diff --git a/backend/version.py b/backend/version.py index 7ae0322..e826e46 100644 --- a/backend/version.py +++ b/backend/version.py @@ -1,6 +1,6 @@ # Shinkan Jinkendo Version Information -APP_VERSION = "0.8.185" +APP_VERSION = "0.8.186" BUILD_DATE = "2026-05-23" DB_SCHEMA_VERSION = "20260531074" @@ -29,7 +29,7 @@ MODULE_VERSIONS = { "skill_profiles": "1.0.0", # Phase 3: gewichtetes Fähigkeiten-Profil + skill-discovery/suggestions "methods": "0.1.0", "exercises": "2.37.0", # Planungs-KI P1: Szenario-Pipeline + Query-Intent-Overlay - "planning_exercise_suggest": "0.13.0", # Phase C3: progression-path-suggest für Graph-Builder + "planning_exercise_suggest": "0.14.0", # Phase E: Semantik-Schicht + Pfad-QA "training_units": "0.4.0", # POST .../publish-to-framework: Ablauf aus geplanter Einheit → Rahmen-Slot-Blueprint "training_programs": "0.1.0", "planning": "0.15.0", # Vorlagen: Strukturvorschau, Bearbeiten inkl. Split-Sessions + Beschreibung @@ -44,6 +44,14 @@ MODULE_VERSIONS = { } CHANGELOG = [ + { + "version": "0.8.186", + "date": "2026-05-23", + "changes": [ + "Planungs-KI Phase E: Semantic Brief (Technik-Phrasen, Ausschlüsse, Entwicklungsbogen) im Hybrid-Retrieval.", + "Pfad-Builder: semantische Schritte, Lücken-Erkennung, Brücken-Übungen, optional LLM-Pfad-QS.", + ], + }, { "version": "0.8.185", "date": "2026-05-23", diff --git a/docs/HANDOVER.md b/docs/HANDOVER.md index 907bb66..d308727 100644 --- a/docs/HANDOVER.md +++ b/docs/HANDOVER.md @@ -1,7 +1,7 @@ # Shinkan Jinkendo – Entwicklungsstand & Handover **Stand:** 2026-05-23 -**App-Version / DB-Schema:** App **`0.8.185`** (Planungs-KI Phase C3); DB **`20260531074`** — maßgeblich **`backend/version.py`**. +**App-Version / DB-Schema:** App **`0.8.186`** (Planungs-KI Phase E Semantik + Pfad-QA); DB **`20260531074`** — maßgeblich **`backend/version.py`**. Diese Datei ist die **Einstiegs-Doku für neue Chat-Sessions**: Anforderungen im Detail stehen in `.claude/docs/` (siehe unten); hier der **implementierte Stand**, **Medien-Meilenstein** und **sinnvolle nächste Schritte**. @@ -104,6 +104,7 @@ Das Schema ist gegenüber dem Code zurück: Migration **`022_skills_schema_compl | **C1** | Progressionsgraph auto-match + variantenbewusste Nachfolger | ✅ **0.8.183** | | **C2** | Varianten in Trefferliste / Picker-Auswahl | ✅ **0.8.184** | | **C3** | Graph-Builder: Ziel → Pfad vorschlagen → in Graph speichern | ✅ **0.8.185** | +| **E** | Semantik-Schicht (Brief, Phrasen-Score) + Pfad-QA (Lücken, Brücken, LLM-QS) | ✅ **0.8.186** | | **D** | `planning_context` an `suggestExerciseAi` (Neu-Anlage) | 🔲 | **Backend:** `planning_exercise_suggest.py`, `planning_exercise_retrieval.py`, `planning_exercise_profiles.py`, `planning_exercise_target_pipeline.py`, `planning_exercise_progression.py` · Router `POST /api/planning/exercise-suggest` @@ -248,9 +249,9 @@ Das Schema ist gegenüber dem Code zurück: Migration **`022_skills_schema_compl ### Planungs-KI (priorisiert) 1. **Graph-Auswahl UI:** Dropdown neben Auto-Match; Rahmen-Slot mit Default-Graph verknüpfen. -2. **Enrichment:** Skills für Kern-Übungen nachziehen (sonst schwaches Profil-Ranking). +2. **Enrichment:** Skills/Tags pro Technik (Feinauflösung statt nur Geri Waza). 3. **D — Neu-Anlage:** `planning_context_json` an `POST /api/exercises/ai/suggest`. -4. **C3 Feinschliff:** Einzelschritte im Pfad manuell ersetzen (Picker); Pfad an bestehende Kette anhängen. +4. **E Feinschliff:** Pfad-QA → automatische Neuordnung; fehlende Schritte als KI-Neuanlage vorschlagen. ### Allgemein diff --git a/frontend/src/components/ExerciseProgressionPathBuilder.jsx b/frontend/src/components/ExerciseProgressionPathBuilder.jsx index 90ae429..e8548a5 100644 --- a/frontend/src/components/ExerciseProgressionPathBuilder.jsx +++ b/frontend/src/components/ExerciseProgressionPathBuilder.jsx @@ -19,6 +19,8 @@ function mapApiStepToRow(step) { variantId, variants, reasons: Array.isArray(step?.reasons) ? step.reasons : [], + isBridge: Boolean(step?.is_bridge), + semanticScore: step?.semantic_score, } } @@ -34,6 +36,8 @@ export default function ExerciseProgressionPathBuilder({ const [saving, setSaving] = useState(false) const [error, setError] = useState('') const [targetSummary, setTargetSummary] = useState(null) + const [semanticBrief, setSemanticBrief] = useState(null) + const [pathQa, setPathQa] = useState(null) const [pathSteps, setPathSteps] = useState([]) const patchStep = useCallback((idx, patch) => { @@ -73,6 +77,8 @@ export default function ExerciseProgressionPathBuilder({ query: q, max_steps: Number(maxSteps), include_llm_intent: true, + include_path_qa: true, + include_llm_path_qa: true, progression_graph_id: Number(graphId), }) const rows = (Array.isArray(res?.steps) ? res.steps : []).map(mapApiStepToRow) @@ -81,12 +87,16 @@ export default function ExerciseProgressionPathBuilder({ } setPathSteps(rows) setTargetSummary(res?.target_profile_summary || null) + setSemanticBrief(res?.semantic_brief_summary || null) + setPathQa(res?.path_qa || null) if (!segmentNotes.trim() && q) setSegmentNotes(q.slice(0, 400)) } catch (e) { console.error(e) setError(e.message || 'Pfad-Vorschlag fehlgeschlagen') setPathSteps([]) setTargetSummary(null) + setSemanticBrief(null) + setPathQa(null) } finally { setLoading(false) } @@ -122,6 +132,8 @@ export default function ExerciseProgressionPathBuilder({ }) setPathSteps([]) setTargetSummary(null) + setSemanticBrief(null) + setPathQa(null) if (typeof onSaved === 'function') await onSaved() alert(`${n} Nachfolger-Kante(n) aus KI-Pfad gespeichert.`) } catch (e) { @@ -142,8 +154,8 @@ export default function ExerciseProgressionPathBuilder({ >

KI: Pfad zum Ziel

- Ziel in Freitext formulieren — die Planungs-KI schlägt eine aufbauende Übungsreihe vor. Nach Review als - Nachfolger-Ketten in den aktiven Graph speichern (über mehrere Trainingspläne hinweg nutzbar). + Ziel in Freitext formulieren — die Planungs-KI schlägt eine semantisch passende, aufbauende Reihenfolge vor, + prüft Lücken (ggf. Brücken-Übungen) und optional per LLM-QS. Nach Review in den Graph speichern.

@@ -184,15 +196,59 @@ export default function ExerciseProgressionPathBuilder({

) : null} - {targetSummary && pathSteps.length > 0 ? ( + {(semanticBrief || targetSummary) && pathSteps.length > 0 ? (
- {Array.isArray(targetSummary.focus_areas) && - targetSummary.focus_areas.slice(0, 2).map((fa) => ( + {semanticBrief?.primary_topic ? ( + + Thema: {semanticBrief.primary_topic} + + ) : null} + {Array.isArray(semanticBrief?.development_arc) && + semanticBrief.development_arc.slice(0, 3).map((phase) => ( + + {phase} + + ))} + {Array.isArray(targetSummary?.focus_areas) && + targetSummary.focus_areas.slice(0, 1).map((fa) => ( Fokus: {fa} ))} - {Array.isArray(targetSummary.top_skills) && +
+ ) : null} + + {pathQa && pathSteps.length > 0 ? ( +
+ + Pfad-QS: {pathQa.overall_ok ? 'OK' : 'Hinweise'} + {pathQa.quality_score != null ? ` (${Math.round(Number(pathQa.quality_score) * 100)} %)` : ''} + + {pathQa.topic_coverage ? ( +

{pathQa.topic_coverage}

+ ) : null} + {Array.isArray(pathQa.issues) && pathQa.issues.length > 0 ? ( +
    + {pathQa.issues.slice(0, 4).map((issue) => ( +
  • {issue}
  • + ))} +
+ ) : null} + {Number(pathQa.bridge_insert_count) > 0 ? ( +

+ {pathQa.bridge_insert_count} Brücken-Übung(en) eingefügt (Lückenfüller). +

+ ) : null} + {Array.isArray(targetSummary?.top_skills) && targetSummary.top_skills.slice(0, 2).map((sk) => ( {sk.name} @@ -220,6 +276,7 @@ export default function ExerciseProgressionPathBuilder({