All checks were successful
Deploy Development / deploy (push) Successful in 42s
Test Suite / pytest-backend (push) Successful in 43s
Test Suite / lint-backend (push) Successful in 0s
Test Suite / build-frontend (push) Successful in 13s
Test Suite / k6 /health Baseline (push) Successful in 33s
Test Suite / playwright-tests (push) Successful in 1m13s
- Added `semantic_brief_for_stage` function to enhance semantic briefs with stage learning goals for improved roadmap matching. - Introduced `exercise_passes_stage_learning_goal_gate` to validate exercises against stage learning goals, enhancing relevance checks. - Updated path retrieval and scoring logic to incorporate stage learning goals, allowing for more nuanced exercise selection. - Enhanced UI to indicate weak matches with stage learning goals, improving user feedback on exercise relevance. - Incremented application version to reflect these updates.
834 lines
27 KiB
Python
834 lines
27 KiB
Python
"""
|
|
Planungs-KI Phase E: Semantik-Schicht für Anfrage-Verständnis und Retrieval.
|
|
|
|
Trennt anfrage-spezifische Semantik (Technik, Phrasen, Entwicklungsbogen) vom
|
|
Katalog-Profil-Overlay (Fokus/Skills). Wird in Hybrid-Retrieval und Pfad-QA genutzt.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import logging
|
|
import re
|
|
from typing import Any, Dict, List, Mapping, Optional, Sequence, Tuple
|
|
|
|
from pydantic import BaseModel, Field, field_validator
|
|
|
|
from ai_prompt_runtime import AiPromptUnavailableError, load_and_render_ai_prompt
|
|
from exercise_ai import strip_html_to_plain
|
|
from openrouter_chat import (
|
|
effective_openrouter_model_for_prompt_row,
|
|
normalize_openrouter_env,
|
|
openrouter_chat_completion,
|
|
)
|
|
|
|
_logger = logging.getLogger("shinkan.planning_exercise_semantics")
|
|
|
|
_GERI_TECHNIQUES: Tuple[Tuple[str, Tuple[str, ...]], ...] = (
|
|
("mae geri", ("mawashi geri", "yoko geri", "ushiro geri", "sakuto geri", "mikazuki geri")),
|
|
("mawashi geri", ("mae geri", "yoko geri", "ushiro geri", "sakuto geri")),
|
|
("yoko geri", ("mae geri", "mawashi geri", "ushiro geri", "sakuto geri")),
|
|
("ushiro geri", ("mae geri", "mawashi geri", "yoko geri", "sakuto geri")),
|
|
("sakuto geri", ("mae geri", "mawashi geri", "yoko geri", "mikazuki geri")),
|
|
("mikazuki geri", ("mae geri", "mawashi geri", "sakuto geri")),
|
|
)
|
|
|
|
_OTHER_TECHNIQUE_PATTERNS: Tuple[Tuple[str, Tuple[str, ...]], ...] = (
|
|
("oi zuki", ("gyaku zuki", "age uke", "gedan barai")),
|
|
("gyaku zuki", ("oi zuki", "mae geri")),
|
|
("age uke", ("gedan barai", "soto uke")),
|
|
("gedan barai", ("age uke", "soto uke")),
|
|
)
|
|
|
|
_TECHNIQUE_EXPECTED_SKILLS: Dict[str, Tuple[str, ...]] = {
|
|
"mae geri": ("Geri Waza", "Koordination", "Gleichgewicht", "Kime"),
|
|
"mawashi geri": ("Geri Waza", "Koordination", "Gleichgewicht"),
|
|
"yoko geri": ("Geri Waza", "Koordination", "Gleichgewicht"),
|
|
"ushiro geri": ("Geri Waza", "Koordination", "Gleichgewicht"),
|
|
"sakuto geri": ("Geri Waza", "Koordination", "Gleichgewicht"),
|
|
"mikazuki geri": ("Geri Waza", "Koordination", "Gleichgewicht"),
|
|
}
|
|
|
|
_DEFAULT_TECHNIQUE_SKILLS: Tuple[str, ...] = ("Geri Waza", "Koordination", "Gleichgewicht")
|
|
|
|
_ARC_PHASES: Tuple[Tuple[str, Tuple[str, ...]], ...] = (
|
|
("einstieg", ("einstieg", "erlernen", "lernen", "anfänger", "anfaenger", "beginn", "grund")),
|
|
("grundlage", ("grundlage", "fundament", "basis", "basic")),
|
|
("vertiefung", ("vertief", "festigung", "übung", "uebung", "wiederhol")),
|
|
("anwendung", ("anwend", "partner", "kampf", "kumite", "reaktion")),
|
|
("perfektion", ("perfekt", "meisterschaft", "höchst", "hoechst", "kime", "sauber")),
|
|
)
|
|
|
|
_PHASE_QUERY_HINTS: Dict[str, str] = {
|
|
"einstieg": "einstieg grundübung einfach",
|
|
"grundlage": "grundtechnik festigung",
|
|
"vertiefung": "vertiefung technik übung",
|
|
"anwendung": "anwendung partner variante",
|
|
"perfektion": "perfektion kontrolle kime höchste stufe",
|
|
}
|
|
|
|
_QUERY_STOPWORDS = frozenset(
|
|
{
|
|
"von",
|
|
"bis",
|
|
"zur",
|
|
"zum",
|
|
"der",
|
|
"die",
|
|
"das",
|
|
"des",
|
|
"den",
|
|
"dem",
|
|
"ein",
|
|
"eine",
|
|
"einer",
|
|
"eines",
|
|
"und",
|
|
"oder",
|
|
"mit",
|
|
"für",
|
|
"fuer",
|
|
"im",
|
|
"in",
|
|
"am",
|
|
"an",
|
|
"auf",
|
|
"aus",
|
|
"beim",
|
|
"nach",
|
|
"vor",
|
|
"über",
|
|
"ueber",
|
|
"unter",
|
|
"wie",
|
|
"was",
|
|
"wo",
|
|
"wir",
|
|
"soll",
|
|
"sollen",
|
|
"bitte",
|
|
"schlage",
|
|
"vorschlag",
|
|
"übung",
|
|
"uebung",
|
|
"übungen",
|
|
"uebungen",
|
|
}
|
|
)
|
|
|
|
|
|
class PlanningSemanticBrief(BaseModel):
|
|
primary_topic: Optional[str] = Field(default=None, max_length=120)
|
|
topic_type: str = Field(default="general", max_length=40)
|
|
must_phrases: List[str] = Field(default_factory=list)
|
|
exclude_phrases: List[str] = Field(default_factory=list)
|
|
development_arc: List[str] = Field(default_factory=list)
|
|
retrieval_query: str = Field(default="", max_length=500)
|
|
semantic_strength: float = Field(default=0.0, ge=0.0, le=1.0)
|
|
rationale: Optional[str] = Field(default=None, max_length=400)
|
|
|
|
@field_validator("topic_type")
|
|
@classmethod
|
|
def _topic_type(cls, v: str) -> str:
|
|
s = (v or "general").strip().lower()
|
|
return s if s in {"general", "technique", "focus", "method", "skill"} else "general"
|
|
|
|
@field_validator("must_phrases", "exclude_phrases", "development_arc", mode="before")
|
|
@classmethod
|
|
def _norm_phrase_list(cls, v: Any) -> List[str]:
|
|
if not v:
|
|
return []
|
|
if isinstance(v, str):
|
|
s = _normalize_phrase(v)
|
|
return [s] if s else []
|
|
out: List[str] = []
|
|
for item in v:
|
|
s = _normalize_phrase(str(item or ""))
|
|
if s and s not in out:
|
|
out.append(s[:120])
|
|
return out[:12]
|
|
|
|
|
|
def _normalize_phrase(text: str) -> str:
|
|
return re.sub(r"\s+", " ", (text or "").strip().lower())
|
|
|
|
|
|
def _normalize_query(text: str) -> str:
|
|
return re.sub(r"\s+", " ", (text or "").strip())
|
|
|
|
|
|
def _extract_json_object(text: str) -> Dict[str, Any]:
|
|
s = (text or "").strip()
|
|
if s.startswith("```"):
|
|
s = re.sub(r"^```[a-zA-Z0-9]*\s*", "", s)
|
|
if s.endswith("```"):
|
|
s = s[:-3].strip()
|
|
start = s.find("{")
|
|
end = s.rfind("}")
|
|
if start < 0 or end <= start:
|
|
raise ValueError("Kein JSON-Objekt in LLM-Antwort")
|
|
obj = json.loads(s[start : end + 1])
|
|
if not isinstance(obj, dict):
|
|
raise ValueError("LLM-Antwort ist kein JSON-Objekt")
|
|
return obj
|
|
|
|
|
|
def _find_technique_in_text(q_lower: str) -> Optional[Tuple[str, Tuple[str, ...]]]:
|
|
for primary, excludes in _GERI_TECHNIQUES + _OTHER_TECHNIQUE_PATTERNS:
|
|
if primary in q_lower:
|
|
return primary, excludes
|
|
return None
|
|
|
|
|
|
def _detect_development_arc(q_lower: str) -> List[str]:
|
|
found: List[str] = []
|
|
for phase, markers in _ARC_PHASES:
|
|
if any(m in q_lower for m in markers):
|
|
if phase not in found:
|
|
found.append(phase)
|
|
if not found and ("von" in q_lower and "bis" in q_lower):
|
|
found = ["einstieg", "perfektion"]
|
|
return found
|
|
|
|
|
|
def _keyword_phrases_from_query(query: str) -> List[str]:
|
|
q = _normalize_query(query).lower()
|
|
tokens = re.findall(r"[a-zäöüß]{3,}", q, flags=re.IGNORECASE)
|
|
phrases: List[str] = []
|
|
for i, tok in enumerate(tokens):
|
|
low = tok.lower()
|
|
if low in _QUERY_STOPWORDS:
|
|
continue
|
|
if i + 1 < len(tokens):
|
|
nxt = tokens[i + 1].lower()
|
|
if nxt not in _QUERY_STOPWORDS:
|
|
pair = _normalize_phrase(f"{low} {nxt}")
|
|
if len(pair) >= 5 and pair not in phrases:
|
|
phrases.append(pair)
|
|
if len(low) >= 4 and low not in phrases:
|
|
phrases.append(low)
|
|
return phrases[:6]
|
|
|
|
|
|
def build_semantic_brief(query: Optional[str]) -> PlanningSemanticBrief:
|
|
"""Deterministisches Anfrage-Verständnis — ohne LLM."""
|
|
q = _normalize_query(query)
|
|
if not q:
|
|
return PlanningSemanticBrief(retrieval_query="", semantic_strength=0.0)
|
|
|
|
q_lower = q.lower()
|
|
must: List[str] = []
|
|
exclude: List[str] = []
|
|
topic_type = "general"
|
|
primary: Optional[str] = None
|
|
strength = 0.25
|
|
|
|
technique = _find_technique_in_text(q_lower)
|
|
if technique:
|
|
primary, ex = technique
|
|
must.append(primary)
|
|
exclude.extend(list(ex))
|
|
topic_type = "technique"
|
|
strength = max(strength, 0.82)
|
|
|
|
arc = _detect_development_arc(q_lower)
|
|
if arc:
|
|
strength = max(strength, 0.55 if technique else 0.45)
|
|
|
|
# Keine generischen Stichwörter in must_phrases — sonst verwässert das Scoring.
|
|
retrieval_parts = list(must)
|
|
if primary:
|
|
retrieval_parts.append(primary)
|
|
if arc:
|
|
retrieval_parts.extend(arc[:2])
|
|
retrieval = " ".join(dict.fromkeys(retrieval_parts))[:500] if retrieval_parts else q
|
|
|
|
if len(q) >= 24 and not technique:
|
|
strength = max(strength, 0.4)
|
|
|
|
return PlanningSemanticBrief(
|
|
primary_topic=primary,
|
|
topic_type=topic_type,
|
|
must_phrases=must[:8],
|
|
exclude_phrases=exclude[:10],
|
|
development_arc=arc[:5],
|
|
retrieval_query=retrieval[:500],
|
|
semantic_strength=min(1.0, round(strength, 3)),
|
|
rationale=None,
|
|
)
|
|
|
|
|
|
def merge_semantic_brief_llm(
|
|
base: PlanningSemanticBrief,
|
|
llm_obj: Mapping[str, Any],
|
|
) -> PlanningSemanticBrief:
|
|
"""LLM-Enrichment in deterministisches Brief mergen (LLM ergänzt, ersetzt nicht harte Technik-Regeln)."""
|
|
data = base.model_dump()
|
|
for key in ("primary_topic", "topic_type", "rationale"):
|
|
val = llm_obj.get(key)
|
|
if val:
|
|
data[key] = val
|
|
|
|
for key in ("must_phrases", "exclude_phrases", "development_arc"):
|
|
extra = llm_obj.get(key) or []
|
|
merged = list(data.get(key) or [])
|
|
for item in extra:
|
|
s = _normalize_phrase(str(item or ""))
|
|
if s and s not in merged:
|
|
merged.append(s)
|
|
data[key] = merged[:12]
|
|
|
|
llm_strength = llm_obj.get("semantic_strength")
|
|
if llm_strength is not None:
|
|
try:
|
|
data["semantic_strength"] = min(
|
|
1.0,
|
|
max(float(data["semantic_strength"]), float(llm_strength)),
|
|
)
|
|
except (TypeError, ValueError):
|
|
pass
|
|
|
|
if data.get("must_phrases"):
|
|
core = semantic_core_phrases(PlanningSemanticBrief.model_validate(data))
|
|
data["retrieval_query"] = " ".join(core[:4])[:500] if core else data.get("retrieval_query", "")
|
|
out = PlanningSemanticBrief.model_validate(data)
|
|
if out.primary_topic and out.topic_type == "general":
|
|
out = out.model_copy(update={"topic_type": "technique"})
|
|
return out
|
|
|
|
|
|
def try_enrich_semantic_brief_with_llm(
|
|
cur,
|
|
query: str,
|
|
base: PlanningSemanticBrief,
|
|
) -> Tuple[PlanningSemanticBrief, bool]:
|
|
api_key, _ = normalize_openrouter_env()
|
|
if not api_key or base.semantic_strength < 0.35:
|
|
return base, False
|
|
if not (query or "").strip():
|
|
return base, False
|
|
|
|
variables = {
|
|
"search_query": (query or "").strip(),
|
|
"semantic_brief_json": json.dumps(brief_to_summary_dict(base), ensure_ascii=False),
|
|
}
|
|
try:
|
|
prow, rendered = load_and_render_ai_prompt(cur, "planning_exercise_query_semantics", variables)
|
|
model = effective_openrouter_model_for_prompt_row(prow)
|
|
raw = openrouter_chat_completion(api_key=api_key, model=model, user_content=rendered.text)
|
|
obj = _extract_json_object(raw)
|
|
return merge_semantic_brief_llm(base, obj), True
|
|
except AiPromptUnavailableError:
|
|
return base, False
|
|
except Exception as exc:
|
|
_logger.warning("Semantik-LLM fehlgeschlagen: %s", exc)
|
|
return base, False
|
|
|
|
|
|
def brief_to_summary_dict(brief: PlanningSemanticBrief) -> Dict[str, Any]:
|
|
return {
|
|
"primary_topic": brief.primary_topic,
|
|
"topic_type": brief.topic_type,
|
|
"must_phrases": list(brief.must_phrases),
|
|
"exclude_phrases": list(brief.exclude_phrases),
|
|
"development_arc": list(brief.development_arc),
|
|
"retrieval_query": brief.retrieval_query,
|
|
"semantic_strength": brief.semantic_strength,
|
|
"rationale": brief.rationale,
|
|
}
|
|
|
|
|
|
def step_phase_for_index(brief: PlanningSemanticBrief, step_index: int, max_steps: int) -> Optional[str]:
|
|
arc = list(brief.development_arc or [])
|
|
if not arc:
|
|
if max_steps <= 1:
|
|
return None
|
|
default_arc = ["einstieg", "grundlage", "vertiefung", "anwendung", "perfektion"]
|
|
arc = default_arc[:max_steps] if brief.semantic_strength >= 0.5 else []
|
|
if not arc:
|
|
return None
|
|
if len(arc) == 1:
|
|
return arc[0]
|
|
pos = step_index / max(max_steps - 1, 1)
|
|
idx = min(len(arc) - 1, int(round(pos * (len(arc) - 1))))
|
|
return arc[idx]
|
|
|
|
|
|
def step_retrieval_query(
|
|
brief: PlanningSemanticBrief,
|
|
goal_query: str,
|
|
step_index: int,
|
|
max_steps: int,
|
|
) -> str:
|
|
phase = step_phase_for_index(brief, step_index, max_steps)
|
|
parts: List[str] = []
|
|
if brief.primary_topic:
|
|
parts.append(brief.primary_topic)
|
|
elif brief.retrieval_query:
|
|
parts.append(brief.retrieval_query.split()[0] if brief.retrieval_query else "")
|
|
if phase:
|
|
parts.append(phase)
|
|
if not parts and brief.retrieval_query:
|
|
parts.append(brief.retrieval_query)
|
|
elif not parts and goal_query:
|
|
parts.append(goal_query)
|
|
return _normalize_query(" ".join(p for p in parts if p)) or _normalize_query(goal_query)
|
|
|
|
|
|
def apply_dynamic_retrieval_weights(
|
|
base_weights: Mapping[str, float],
|
|
brief: PlanningSemanticBrief,
|
|
*,
|
|
scenario: str,
|
|
has_planning_reference: bool,
|
|
) -> Dict[str, float]:
|
|
"""Semantik-Kanal dynamisch gegen Profil/Plan abwägen."""
|
|
out = dict(base_weights)
|
|
sem = float(brief.semantic_strength or 0.0)
|
|
if sem <= 0.05:
|
|
out.setdefault("semantic", 0.0)
|
|
return out
|
|
|
|
query_driven = scenario == "free_search" or not has_planning_reference
|
|
sem_weight = 0.12 + sem * (0.38 if query_driven else 0.22)
|
|
out["semantic"] = round(sem_weight, 4)
|
|
|
|
if query_driven:
|
|
scale = 1.0 - sem * 0.35
|
|
out["fulltext"] = round(float(out.get("fulltext", 0.18)) * scale, 4)
|
|
out["profile"] = round(float(out.get("profile", 0.22)) * (1.0 - sem * 0.25), 4)
|
|
else:
|
|
out["fulltext"] = round(float(out.get("fulltext", 0.18)) * (1.0 - sem * 0.15), 4)
|
|
|
|
total = sum(v for k, v in out.items() if k not in {"repeat_unit", "repeat_group"} and v > 0)
|
|
if total > 0.92:
|
|
factor = 0.88 / total
|
|
for k in list(out.keys()):
|
|
if k in {"repeat_unit", "repeat_group"}:
|
|
continue
|
|
if out[k] > 0:
|
|
out[k] = round(out[k] * factor, 4)
|
|
return out
|
|
|
|
|
|
def _blob_from_fields(
|
|
title: str,
|
|
summary: str,
|
|
goal: str,
|
|
variant_names: Sequence[str],
|
|
) -> str:
|
|
parts = [title or "", strip_html_to_plain(summary, max_len=600), strip_html_to_plain(goal, max_len=800)]
|
|
parts.extend(variant_names or [])
|
|
return " ".join(p for p in parts if p).lower()
|
|
|
|
|
|
def _compact_alpha(text: str) -> str:
|
|
return re.sub(r"[^a-z0-9äöüß]+", "", (text or "").lower())
|
|
|
|
|
|
def _phrase_in_blob(phrase: str, blob: str) -> bool:
|
|
ph = _normalize_phrase(phrase)
|
|
if not ph or not blob:
|
|
return False
|
|
low = blob.lower()
|
|
if ph in low:
|
|
return True
|
|
if _compact_alpha(ph) and _compact_alpha(ph) in _compact_alpha(low):
|
|
return True
|
|
if " " not in ph:
|
|
return bool(re.search(rf"\b{re.escape(ph)}\b", low))
|
|
return ph in low
|
|
|
|
|
|
def score_exercise_semantic_relevance(
|
|
*,
|
|
title: str,
|
|
summary: str,
|
|
goal: str,
|
|
variant_names: Sequence[str],
|
|
brief: PlanningSemanticBrief,
|
|
step_phase: Optional[str] = None,
|
|
) -> Tuple[float, List[str]]:
|
|
if brief.semantic_strength <= 0.05:
|
|
return 0.0, []
|
|
|
|
blob = _blob_from_fields(title, summary, goal, variant_names)
|
|
if not blob.strip():
|
|
return 0.0, []
|
|
|
|
reasons: List[str] = []
|
|
must = list(brief.must_phrases or [])
|
|
exclude = list(brief.exclude_phrases or [])
|
|
core = semantic_core_phrases(brief)
|
|
|
|
core_hits = sum(1 for ph in core if _phrase_in_blob(ph, blob))
|
|
must_hits = sum(1 for ph in must if _phrase_in_blob(ph, blob))
|
|
exclude_hits = sum(1 for ph in exclude if _phrase_in_blob(ph, blob))
|
|
|
|
score = 0.0
|
|
if core:
|
|
core_ratio = core_hits / len(core)
|
|
score += 0.62 * core_ratio
|
|
if core_hits == len(core):
|
|
reasons.append("Kern-Thema der Anfrage im Übungstext")
|
|
elif core_hits > 0:
|
|
reasons.append("Teilweise passend zum Kern-Thema")
|
|
elif brief.primary_topic and _phrase_in_blob(brief.primary_topic, blob):
|
|
score += 0.55
|
|
reasons.append(f"Thema „{brief.primary_topic}“ im Übungstext")
|
|
|
|
if must and core != must:
|
|
extra_ratio = must_hits / len(must)
|
|
score += 0.12 * extra_ratio
|
|
|
|
primary_ok = bool(core_hits) or (
|
|
brief.primary_topic and _phrase_in_blob(brief.primary_topic, blob)
|
|
)
|
|
if exclude_hits > 0 and not primary_ok:
|
|
penalty = min(0.65, 0.22 * exclude_hits)
|
|
score -= penalty
|
|
reasons.append("Enthält ausgeschlossene Nebenthemen")
|
|
elif exclude_hits > 0 and primary_ok:
|
|
score -= min(0.12, 0.06 * exclude_hits)
|
|
|
|
if step_phase and step_phase in _PHASE_QUERY_HINTS:
|
|
phase_markers = next((markers for phase, markers in _ARC_PHASES if phase == step_phase), ())
|
|
if any(m in blob for m in phase_markers) or step_phase in blob:
|
|
score += 0.12
|
|
reasons.append(f"Passt zur Pfad-Phase „{step_phase}“")
|
|
|
|
if brief.development_arc and not step_phase:
|
|
arc_hits = sum(1 for phase in brief.development_arc if phase in blob)
|
|
if arc_hits:
|
|
score += min(0.15, 0.05 * arc_hits)
|
|
|
|
return max(0.0, min(1.0, round(score, 4))), reasons[:4]
|
|
|
|
|
|
def semantic_core_phrases(brief: PlanningSemanticBrief) -> List[str]:
|
|
"""Harte Kernphrasen fürs Matching."""
|
|
if brief.primary_topic:
|
|
return [_normalize_phrase(brief.primary_topic)]
|
|
core = [_normalize_phrase(p) for p in (brief.must_phrases or [])[:2] if p]
|
|
return [p for p in core if p]
|
|
|
|
|
|
def resolve_semantic_skill_weights(cur, brief: PlanningSemanticBrief) -> Dict[int, float]:
|
|
"""Deterministisches Fähigkeitserwartungsprofil aus Technik-Thema."""
|
|
topic = _normalize_phrase(brief.primary_topic or "")
|
|
if topic in _TECHNIQUE_EXPECTED_SKILLS:
|
|
names = list(_TECHNIQUE_EXPECTED_SKILLS[topic])
|
|
elif brief.topic_type == "technique" or "geri" in topic:
|
|
names = list(_DEFAULT_TECHNIQUE_SKILLS)
|
|
else:
|
|
return {}
|
|
|
|
weights: Dict[int, float] = {}
|
|
for name in names[:6]:
|
|
cur.execute(
|
|
"""
|
|
SELECT id, name FROM skills
|
|
WHERE (status IS NULL OR status = 'active')
|
|
AND LOWER(name) LIKE %s
|
|
ORDER BY CASE WHEN LOWER(name) = %s THEN 0 WHEN LOWER(name) LIKE %s THEN 1 ELSE 2 END,
|
|
LENGTH(name) ASC
|
|
LIMIT 1
|
|
""",
|
|
(f"%{name.lower()}%", name.lower(), f"{name.lower()}%"),
|
|
)
|
|
row = cur.fetchone()
|
|
if row:
|
|
sid = int(row["id"])
|
|
weights[sid] = max(weights.get(sid, 0.0), 1.0)
|
|
return weights
|
|
|
|
|
|
def enrich_target_with_semantic_expectations(
|
|
target,
|
|
*,
|
|
skill_weights: Dict[int, float],
|
|
):
|
|
from planning_exercise_profiles import PlanningTargetProfile, _merge_weight_maps, _normalize_weight_map
|
|
|
|
if not skill_weights:
|
|
return target
|
|
merged = _normalize_weight_map(_merge_weight_maps(dict(target.skill_weights), skill_weights, scale=1.0))
|
|
sources = list(target.sources)
|
|
if "semantic_expectation" not in sources:
|
|
sources.append("semantic_expectation")
|
|
return PlanningTargetProfile(
|
|
focus_area_ids=dict(target.focus_area_ids),
|
|
style_direction_ids=dict(target.style_direction_ids),
|
|
training_type_ids=dict(target.training_type_ids),
|
|
target_group_ids=dict(target.target_group_ids),
|
|
skill_weights=merged,
|
|
skill_gap_weights=dict(target.skill_gap_weights),
|
|
skill_plan_weights=dict(target.skill_plan_weights),
|
|
sources=sources,
|
|
)
|
|
|
|
|
|
def apply_path_retrieval_weights(brief: PlanningSemanticBrief) -> Dict[str, float]:
|
|
"""Pfad-Builder: Semantik + Profil dominieren."""
|
|
sem = float(brief.semantic_strength or 0.0)
|
|
if sem >= 0.65:
|
|
return {
|
|
"semantic": 0.50,
|
|
"fulltext": 0.16,
|
|
"profile": 0.26,
|
|
"progression": 0.04,
|
|
"skill": 0.04,
|
|
"plan": 0.0,
|
|
"repeat_unit": -0.40,
|
|
"repeat_group": -0.15,
|
|
}
|
|
if sem >= 0.35:
|
|
return {
|
|
"semantic": 0.38,
|
|
"fulltext": 0.18,
|
|
"profile": 0.28,
|
|
"progression": 0.06,
|
|
"skill": 0.06,
|
|
"plan": 0.04,
|
|
"repeat_unit": -0.35,
|
|
"repeat_group": -0.15,
|
|
}
|
|
return {
|
|
"semantic": 0.22,
|
|
"fulltext": 0.22,
|
|
"profile": 0.28,
|
|
"progression": 0.10,
|
|
"skill": 0.10,
|
|
"plan": 0.08,
|
|
"repeat_unit": -0.30,
|
|
"repeat_group": -0.15,
|
|
}
|
|
|
|
|
|
_STAGE_GOAL_STOPWORDS = _QUERY_STOPWORDS | frozenset(
|
|
{
|
|
"stufe",
|
|
"phase",
|
|
"lernziel",
|
|
"grundlage",
|
|
"vertiefung",
|
|
"anwendung",
|
|
"perfektion",
|
|
"einstieg",
|
|
"sicher",
|
|
"sauber",
|
|
"korrekt",
|
|
"technik",
|
|
"training",
|
|
}
|
|
)
|
|
|
|
|
|
def _significant_stage_tokens(learning_goal: str) -> List[str]:
|
|
"""Wörter aus Stufen-Lernziel für Text-Match (ohne Füllwörter)."""
|
|
raw = re.findall(r"[a-zäöüß]{4,}", _normalize_phrase(learning_goal), flags=re.IGNORECASE)
|
|
out: List[str] = []
|
|
for w in raw:
|
|
low = w.lower().replace("ä", "ae").replace("ö", "oe").replace("ü", "ue")
|
|
if low in _STAGE_GOAL_STOPWORDS:
|
|
continue
|
|
if low not in out:
|
|
out.append(low)
|
|
return out[:10]
|
|
|
|
|
|
def semantic_brief_for_stage(
|
|
brief: PlanningSemanticBrief,
|
|
*,
|
|
learning_goal: str,
|
|
phase: Optional[str] = None,
|
|
) -> PlanningSemanticBrief:
|
|
"""Brief um Stufen-Lernziel erweitern — für Roadmap-Match pro Major Step."""
|
|
lg = _normalize_phrase(learning_goal)
|
|
if not lg:
|
|
return brief
|
|
must = list(brief.must_phrases or [])
|
|
if lg not in must:
|
|
must.insert(0, lg[:120])
|
|
arc = list(brief.development_arc or [])
|
|
ph = (phase or "").strip().lower()
|
|
if ph and ph not in arc:
|
|
arc = [ph, *arc]
|
|
strength = max(float(brief.semantic_strength or 0.0), 0.58)
|
|
return brief.model_copy(
|
|
update={
|
|
"must_phrases": must[:12],
|
|
"development_arc": arc[:8],
|
|
"semantic_strength": min(1.0, strength),
|
|
}
|
|
)
|
|
|
|
|
|
def exercise_passes_stage_learning_goal_gate(
|
|
*,
|
|
learning_goal: str,
|
|
title: str,
|
|
summary: str = "",
|
|
goal: str = "",
|
|
semantic_score: float = 0.0,
|
|
min_semantic: float = 0.20,
|
|
relaxed: bool = False,
|
|
) -> bool:
|
|
"""Roadmap-Stufe: Übung muss zum Stufen-Lernziel passen, nicht nur zum Gesamtthema."""
|
|
lg = (learning_goal or "").strip()
|
|
if len(lg) < 3:
|
|
return True
|
|
|
|
blob = _blob_from_fields(title, summary, goal, [])
|
|
norm_lg = _normalize_phrase(lg)
|
|
if _phrase_in_blob(norm_lg, blob):
|
|
return True
|
|
|
|
tokens = _significant_stage_tokens(lg)
|
|
if not tokens:
|
|
threshold = 0.12 if relaxed else min_semantic
|
|
return semantic_score >= threshold
|
|
|
|
hits = sum(1 for t in tokens if _phrase_in_blob(t, blob))
|
|
if len(tokens) <= 2:
|
|
required = 1
|
|
else:
|
|
required = max(2, (len(tokens) + 1) // 2)
|
|
|
|
if hits >= required:
|
|
return True
|
|
|
|
threshold = 0.14 if relaxed else min_semantic
|
|
return semantic_score >= threshold
|
|
|
|
|
|
def exercise_passes_path_semantic_gate(
|
|
*,
|
|
semantic_score: float,
|
|
title: str,
|
|
brief: PlanningSemanticBrief,
|
|
summary: str = "",
|
|
goal: str = "",
|
|
strict: bool = True,
|
|
) -> bool:
|
|
if brief.semantic_strength < 0.55:
|
|
return True
|
|
|
|
blob = _blob_from_fields(title, summary, goal, [])
|
|
min_score = 0.18 if strict else 0.06
|
|
if semantic_score >= min_score:
|
|
return True
|
|
|
|
topic = brief.primary_topic or ""
|
|
if topic and _phrase_in_blob(topic, blob):
|
|
return True
|
|
|
|
if not strict:
|
|
# Mae Geri oft im Fließtext, nicht im Titel
|
|
if semantic_score >= 0.04 and topic and _phrase_in_blob(topic, blob):
|
|
return True
|
|
parts = topic.split()
|
|
if len(parts) >= 2 and all(_phrase_in_blob(p, blob) for p in parts):
|
|
return True
|
|
|
|
return False
|
|
|
|
|
|
def pick_best_path_hit(
|
|
hits: List[Dict[str, Any]],
|
|
used_exercise_ids: Set[int],
|
|
*,
|
|
semantic_brief: Optional[PlanningSemanticBrief] = None,
|
|
stage_learning_goal: Optional[str] = None,
|
|
roadmap_stage_match: bool = False,
|
|
) -> Optional[Dict[str, Any]]:
|
|
"""Gestufte Auswahl: strikt → relaxed → optional Notfall-Fallback."""
|
|
if not hits:
|
|
return None
|
|
|
|
stage_goal = (stage_learning_goal or "").strip()
|
|
|
|
def _scan(*, strict: bool) -> Optional[Dict[str, Any]]:
|
|
best: Optional[Dict[str, Any]] = None
|
|
best_key: Tuple[float, float] = (-1.0, -1.0)
|
|
for hit in hits:
|
|
eid = int(hit["id"])
|
|
if eid in used_exercise_ids:
|
|
continue
|
|
sem = float(hit.get("semantic_score") or 0.0)
|
|
title = str(hit.get("title") or "")
|
|
summary = str(hit.get("summary") or "")
|
|
if semantic_brief and not exercise_passes_path_semantic_gate(
|
|
semantic_score=sem,
|
|
title=title,
|
|
summary=summary,
|
|
goal="",
|
|
brief=semantic_brief,
|
|
strict=strict,
|
|
):
|
|
continue
|
|
if stage_goal and not exercise_passes_stage_learning_goal_gate(
|
|
learning_goal=stage_goal,
|
|
title=title,
|
|
summary=summary,
|
|
semantic_score=sem,
|
|
relaxed=not strict,
|
|
):
|
|
continue
|
|
score = float(hit.get("score") or 0.0)
|
|
key = (sem, score)
|
|
if key > best_key:
|
|
best_key = key
|
|
best = hit
|
|
return best
|
|
|
|
chosen = _scan(strict=True)
|
|
if chosen:
|
|
return chosen
|
|
chosen = _scan(strict=False)
|
|
if chosen:
|
|
return chosen
|
|
|
|
if roadmap_stage_match:
|
|
return None
|
|
|
|
# Notfall (nur retrieval-first / Brücken): bester verbleibender Treffer
|
|
fallback: Optional[Dict[str, Any]] = None
|
|
fallback_key: Tuple[float, float] = (-1.0, -1.0)
|
|
for hit in hits:
|
|
eid = int(hit["id"])
|
|
if eid in used_exercise_ids:
|
|
continue
|
|
sem = float(hit.get("semantic_score") or 0.0)
|
|
score = float(hit.get("score") or 0.0)
|
|
if sem <= 0 and semantic_brief and semantic_brief.primary_topic:
|
|
topic = semantic_brief.primary_topic
|
|
blob = (str(hit.get("title") or "") + " " + str(hit.get("summary") or "")).lower()
|
|
if not _phrase_in_blob(topic, blob):
|
|
continue
|
|
key = (sem, score)
|
|
if key > fallback_key:
|
|
fallback_key = key
|
|
fallback = hit
|
|
return fallback
|
|
|
|
|
|
__all__ = [
|
|
"PlanningSemanticBrief",
|
|
"apply_dynamic_retrieval_weights",
|
|
"apply_path_retrieval_weights",
|
|
"brief_to_summary_dict",
|
|
"build_semantic_brief",
|
|
"enrich_target_with_semantic_expectations",
|
|
"exercise_passes_path_semantic_gate",
|
|
"exercise_passes_stage_learning_goal_gate",
|
|
"merge_semantic_brief_llm",
|
|
"pick_best_path_hit",
|
|
"semantic_brief_for_stage",
|
|
"resolve_semantic_skill_weights",
|
|
"score_exercise_semantic_relevance",
|
|
"semantic_core_phrases",
|
|
"step_phase_for_index",
|
|
"step_retrieval_query",
|
|
"try_enrich_semantic_brief_with_llm",
|
|
]
|