shinkan-jinkendo/backend/planning_exercise_text_signals.py
Lars a0a891e550
Some checks failed
Test Suite / playwright-tests (push) Waiting to run
Deploy Development / deploy (push) Successful in 42s
Test Suite / pytest-backend (push) Successful in 41s
Test Suite / lint-backend (push) Successful in 0s
Test Suite / build-frontend (push) Successful in 13s
Test Suite / k6 /health Baseline (push) Has been cancelled
Implement Phase B Enhancements for Planning Exercise Profiles
- Added support for section guidance notes and titles in the planning target profile, enabling richer context for exercise suggestions.
- Introduced deterministic text-to-catalog signal mapping, allowing for improved integration of planning text signals into the exercise retrieval process.
- Implemented a partner-related filter in exercise retrieval, enhancing the relevance of suggested exercises based on user intent.
- Updated the retrieval phase to account for text signals, improving the accuracy of exercise recommendations.
- Incremented version to 0.8.181 and updated changelog to reflect these significant enhancements in planning AI capabilities.
2026-05-23 10:26:03 +02:00

202 lines
5.9 KiB
Python

"""
Phase B: Deterministische Text→Katalog-Signale für PlanningTargetProfile.
Mappt Abschnitts-guidance, Rahmen-Ziele/-Notizen und Programmbeschreibung
auf Skill-/Katalog-Gewichte (ohne LLM).
"""
from __future__ import annotations
import re
from typing import Any, Dict, List, Mapping, Optional, Sequence, Tuple
_MIN_SKILL_NAME_LEN = 3
_MAX_SKILL_MATCHES = 12
_MAX_CATALOG_MATCHES = 6
def _normalize_text_blob(*parts: Optional[str]) -> str:
chunks: List[str] = []
for p in parts:
s = (p or "").strip()
if s:
chunks.append(s)
return "\n".join(chunks).lower()
def _load_skills_for_text_match(cur) -> List[Tuple[int, str, int]]:
cur.execute(
"""
SELECT id, name FROM skills
WHERE (status IS NULL OR status = 'active')
AND name IS NOT NULL AND TRIM(name) <> ''
ORDER BY LENGTH(name) DESC, name ASC
"""
)
out: List[Tuple[int, str, int]] = []
for row in cur.fetchall():
name = str(row.get("name") or "").strip()
if len(name) < _MIN_SKILL_NAME_LEN:
continue
out.append((int(row["id"]), name.lower(), len(name)))
return out
def _load_catalog_names(cur, table: str, id_col: str = "id", name_col: str = "name") -> List[Tuple[int, str, int]]:
cur.execute(
f"""
SELECT {id_col} AS id, {name_col} AS name
FROM {table}
WHERE {name_col} IS NOT NULL AND TRIM({name_col}) <> ''
ORDER BY LENGTH({name_col}) DESC, {name_col} ASC
"""
)
out: List[Tuple[int, str, int]] = []
for row in cur.fetchall():
name = str(row.get("name") or "").strip()
if len(name) < 2:
continue
out.append((int(row["id"]), name.lower(), len(name)))
return out
def _match_catalog_names_in_text(
text: str,
catalog_rows: Sequence[Tuple[int, str, int]],
*,
weight: float = 0.85,
limit: int = _MAX_CATALOG_MATCHES,
) -> Dict[int, float]:
if not text or not catalog_rows:
return {}
out: Dict[int, float] = {}
for cid, name_lower, _ in catalog_rows:
if len(out) >= limit:
break
if len(name_lower) < 2:
continue
if name_lower in text:
out[cid] = max(out.get(cid, 0.0), weight)
return out
def _match_skills_in_text(
text: str,
skill_rows: Sequence[Tuple[int, str, int]],
*,
limit: int = _MAX_SKILL_MATCHES,
) -> Dict[int, float]:
if not text or not skill_rows:
return {}
out: Dict[int, float] = {}
for sid, name_lower, name_len in skill_rows:
if len(out) >= limit:
break
if name_len < _MIN_SKILL_NAME_LEN:
continue
if name_lower in text:
w = min(1.0, 0.72 + min(name_len, 20) * 0.012)
out[sid] = max(out.get(sid, 0.0), w)
return out
def load_framework_planning_text_parts(
cur,
framework_program_id: int,
*,
slot_id: Optional[int] = None,
) -> List[str]:
"""Sammelt Rahmen-Texte für Text-Signal-Matching."""
parts: List[str] = []
cur.execute(
"SELECT description FROM training_framework_programs WHERE id = %s",
(int(framework_program_id),),
)
row = cur.fetchone()
if row and (row.get("description") or "").strip():
parts.append(str(row["description"]).strip())
cur.execute(
"""
SELECT title, notes FROM training_framework_goals
WHERE framework_program_id = %s
ORDER BY sort_order ASC
""",
(int(framework_program_id),),
)
for g in cur.fetchall():
t = (g.get("title") or "").strip()
n = (g.get("notes") or "").strip()
if t:
parts.append(t)
if n:
parts.append(n)
if slot_id:
cur.execute(
"SELECT title, notes FROM training_framework_slots WHERE id = %s",
(int(slot_id),),
)
srow = cur.fetchone()
if srow:
st = (srow.get("title") or "").strip()
sn = (srow.get("notes") or "").strip()
if st:
parts.append(st)
if sn:
parts.append(sn)
return parts
def resolve_planning_text_to_catalog_weights(
cur,
text_blob: str,
) -> Tuple[Dict[int, float], Dict[int, float], Dict[int, float], Dict[int, float], Dict[int, float]]:
"""
Returns: focus, style, training_type, target_group, skill weight maps.
"""
text = _normalize_text_blob(text_blob)
if not text or len(text) < 3:
return {}, {}, {}, {}, {}
skill_rows = _load_skills_for_text_match(cur)
focus_rows = _load_catalog_names(cur, "focus_areas")
style_rows = _load_catalog_names(cur, "style_directions")
tt_rows = _load_catalog_names(cur, "training_types")
tg_rows = _load_catalog_names(cur, "target_groups")
skills = _match_skills_in_text(text, skill_rows)
focus = _match_catalog_names_in_text(text, focus_rows, weight=0.88)
style = _match_catalog_names_in_text(text, style_rows, weight=0.82)
tt = _match_catalog_names_in_text(text, tt_rows, weight=0.82)
tg = _match_catalog_names_in_text(text, tg_rows, weight=0.8)
if re.search(r"\bpartner\b|\bpaar\b|\bpaarweise\b|\bzu zweit\b", text):
for gid, name_lower, _ in tg_rows:
if "partner" in name_lower or "paar" in name_lower:
tg[gid] = max(tg.get(gid, 0.0), 0.9)
break
return focus, style, tt, tg, skills
def merge_text_signal_summary(
summary: Mapping[str, Any],
*,
text_sources: Sequence[str],
matched_skills: Sequence[Mapping[str, Any]],
) -> Dict[str, Any]:
out = dict(summary)
if text_sources:
out["text_signal_sources"] = list(text_sources)
if matched_skills:
out["text_signal_skills"] = list(matched_skills)[:8]
return out
__all__ = [
"load_framework_planning_text_parts",
"merge_text_signal_summary",
"resolve_planning_text_to_catalog_weights",
]