""" Phase B: Deterministische Text→Katalog-Signale für PlanningTargetProfile. Mappt Abschnitts-guidance, Rahmen-Ziele/-Notizen und Programmbeschreibung auf Skill-/Katalog-Gewichte (ohne LLM). """ from __future__ import annotations import re from typing import Any, Dict, List, Mapping, Optional, Sequence, Tuple _MIN_SKILL_NAME_LEN = 3 _MAX_SKILL_MATCHES = 12 _MAX_CATALOG_MATCHES = 6 def _normalize_text_blob(*parts: Optional[str]) -> str: chunks: List[str] = [] for p in parts: s = (p or "").strip() if s: chunks.append(s) return "\n".join(chunks).lower() def _load_skills_for_text_match(cur) -> List[Tuple[int, str, int]]: cur.execute( """ SELECT id, name FROM skills WHERE (status IS NULL OR status = 'active') AND name IS NOT NULL AND TRIM(name) <> '' ORDER BY LENGTH(name) DESC, name ASC """ ) out: List[Tuple[int, str, int]] = [] for row in cur.fetchall(): name = str(row.get("name") or "").strip() if len(name) < _MIN_SKILL_NAME_LEN: continue out.append((int(row["id"]), name.lower(), len(name))) return out def _load_catalog_names(cur, table: str, id_col: str = "id", name_col: str = "name") -> List[Tuple[int, str, int]]: cur.execute( f""" SELECT {id_col} AS id, {name_col} AS name FROM {table} WHERE {name_col} IS NOT NULL AND TRIM({name_col}) <> '' ORDER BY LENGTH({name_col}) DESC, {name_col} ASC """ ) out: List[Tuple[int, str, int]] = [] for row in cur.fetchall(): name = str(row.get("name") or "").strip() if len(name) < 2: continue out.append((int(row["id"]), name.lower(), len(name))) return out def _match_catalog_names_in_text( text: str, catalog_rows: Sequence[Tuple[int, str, int]], *, weight: float = 0.85, limit: int = _MAX_CATALOG_MATCHES, ) -> Dict[int, float]: if not text or not catalog_rows: return {} out: Dict[int, float] = {} for cid, name_lower, _ in catalog_rows: if len(out) >= limit: break if len(name_lower) < 2: continue if name_lower in text: out[cid] = max(out.get(cid, 0.0), weight) return out def _match_skills_in_text( text: str, skill_rows: Sequence[Tuple[int, str, int]], *, limit: int = _MAX_SKILL_MATCHES, ) -> Dict[int, float]: if not text or not skill_rows: return {} out: Dict[int, float] = {} for sid, name_lower, name_len in skill_rows: if len(out) >= limit: break if name_len < _MIN_SKILL_NAME_LEN: continue if name_lower in text: w = min(1.0, 0.72 + min(name_len, 20) * 0.012) out[sid] = max(out.get(sid, 0.0), w) return out def load_framework_planning_text_parts( cur, framework_program_id: int, *, slot_id: Optional[int] = None, ) -> List[str]: """Sammelt Rahmen-Texte für Text-Signal-Matching.""" parts: List[str] = [] cur.execute( "SELECT description FROM training_framework_programs WHERE id = %s", (int(framework_program_id),), ) row = cur.fetchone() if row and (row.get("description") or "").strip(): parts.append(str(row["description"]).strip()) cur.execute( """ SELECT title, notes FROM training_framework_goals WHERE framework_program_id = %s ORDER BY sort_order ASC """, (int(framework_program_id),), ) for g in cur.fetchall(): t = (g.get("title") or "").strip() n = (g.get("notes") or "").strip() if t: parts.append(t) if n: parts.append(n) if slot_id: cur.execute( "SELECT title, notes FROM training_framework_slots WHERE id = %s", (int(slot_id),), ) srow = cur.fetchone() if srow: st = (srow.get("title") or "").strip() sn = (srow.get("notes") or "").strip() if st: parts.append(st) if sn: parts.append(sn) return parts def resolve_planning_text_to_catalog_weights( cur, text_blob: str, ) -> Tuple[Dict[int, float], Dict[int, float], Dict[int, float], Dict[int, float], Dict[int, float]]: """ Returns: focus, style, training_type, target_group, skill weight maps. """ text = _normalize_text_blob(text_blob) if not text or len(text) < 3: return {}, {}, {}, {}, {} skill_rows = _load_skills_for_text_match(cur) focus_rows = _load_catalog_names(cur, "focus_areas") style_rows = _load_catalog_names(cur, "style_directions") tt_rows = _load_catalog_names(cur, "training_types") tg_rows = _load_catalog_names(cur, "target_groups") skills = _match_skills_in_text(text, skill_rows) focus = _match_catalog_names_in_text(text, focus_rows, weight=0.88) style = _match_catalog_names_in_text(text, style_rows, weight=0.82) tt = _match_catalog_names_in_text(text, tt_rows, weight=0.82) tg = _match_catalog_names_in_text(text, tg_rows, weight=0.8) if re.search(r"\bpartner\b|\bpaar\b|\bpaarweise\b|\bzu zweit\b", text): for gid, name_lower, _ in tg_rows: if "partner" in name_lower or "paar" in name_lower: tg[gid] = max(tg.get(gid, 0.0), 0.9) break return focus, style, tt, tg, skills def merge_text_signal_summary( summary: Mapping[str, Any], *, text_sources: Sequence[str], matched_skills: Sequence[Mapping[str, Any]], ) -> Dict[str, Any]: out = dict(summary) if text_sources: out["text_signal_sources"] = list(text_sources) if matched_skills: out["text_signal_skills"] = list(matched_skills)[:8] return out __all__ = [ "load_framework_planning_text_parts", "merge_text_signal_summary", "resolve_planning_text_to_catalog_weights", ]