shinkan-jinkendo/backend/smw_mapper.py
Lars 1d698e4b0a
Some checks failed
Deploy Development / deploy (push) Failing after 22s
Test Suite / pytest-backend (push) Successful in 35s
Test Suite / lint-backend (push) Successful in 0s
Test Suite / build-frontend (push) Failing after 3s
Test Suite / k6 /health Baseline (push) Successful in 33s
Test Suite / playwright-tests (push) Successful in 1m14s
Implement Phase 3 Enhancements for Skill Scoring and Profiles
- Added capabilities for weighted skill profiles, allowing trainers to compare training modules, frameworks, and regression paths based on skill contributions.
- Updated the skill scoring specification to include peer context separation and list filtering, ensuring accurate comparisons among visible artifacts of the same type.
- Enhanced the API to support batch summaries for skill profiles and discovery suggestions, improving data retrieval efficiency.
- Refactored frontend components to display skill metrics, including scores and peer percentages, with improved filtering options for better user experience.
- Updated documentation to reflect the latest changes and enhancements in the skill scoring system.
2026-05-21 12:35:45 +02:00

463 lines
16 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
Semantic MediaWiki → Shinkan Field Mapper
Wandelt SMW-Properties von karatetrainer.net in lokale DB-Felder um.
Property-Namen wurden via discover_properties() auf echten Wiki-Seiten ermittelt.
Entdeckte Kategorien:
Übungen: Kategorie: Übungen (auch "Übungen Karate", "Übungen allgemein")
Fähigkeiten: Fähigkeitsbeschreibung
Methoden: Methodenbeschreibung
"""
import re
import logging
from typing import Optional
logger = logging.getLogger(__name__)
# ------------------------------------------------------------------ #
# CapabilityLevel Integer → benannte Stufen #
# ------------------------------------------------------------------ #
# Mapping: SMW-Integer → Shinkan-Stufenname
CAPABILITY_LEVEL_MAP = {
"1": "basis",
"2": "grundlagen",
"3": "aufbau",
"4": "fortgeschritten",
"5": "optimierung",
}
# ------------------------------------------------------------------ #
# SMW Property → lokales Feld #
# Echte Namen von karatetrainer.net (via discover_properties) #
# ------------------------------------------------------------------ #
# Übungen (exercises)
EXERCISE_PROPERTY_MAP = {
# Kern-Felder
"Übungsbezeichnung": "title_override", # Übungsname (bevorzugt ggü. Seitentitel)
"Ziel": "goal",
"Durchführung": "execution",
"Summary": "summary",
"Hinweise": "trainer_notes",
"Plandauer": "duration_raw", # Zahl in Minuten z.B. "10"
"Gruppengröße": "group_size_raw", # Zahl z.B. "2"
"Hilfsmittel": "equipment_raw", # Komma-Liste / einzelner Wert
"Schlüsselworte": "keywords_raw", # Keywords (nicht direkt in DB, für spätere Tags)
# Katalog-Felder (Name → ID Lookup)
"Übungstyp": "focus_area_names", # "Karate" → focus_area
"Zielgruppe": "target_group_names",
"Altersgruppe": "age_group_names",
"Trainingsmethode": "method_names", # Wiki-Seitenname z.B. "Plyometrisches_Training"
"Stilrichtung": "style_names", # z. B. Shotokan; siehe EXERCISE_PROPERTY_SYNONYM_TO_TARGET
# Fähigkeiten (als Namen + Level)
"PrimaryCapability": "skill_names", # Skill-Namen (können mehrere sein)
"CapabilityLevel": "skill_levels_raw", # Integer-Levels ["3", "2"] → aufbau, grundlagen
# Weitere Felder (optional)
"Graduierung": "graduierung", # "0 - Anfänger" (zukünftige Nutzung)
"Lernstufe": "lernstufe", # "Lernstufe_1_-_Erlernen_und_Festigen"
}
# Fähigkeiten (skills) Kategorie: Fähigkeitsbeschreibung
SKILL_PROPERTY_MAP = {
"Summary": "description",
"KarateRelevanz": "karate_relevance", # Spalte skills.karate_relevance (+ Wiki-Import)
"RelevanzLevel": "relevance_level", # Spalte skills.relevance_level 13
}
# Trainingsmethoden Kategorie: Methodenbeschreibung
METHOD_PROPERTY_MAP = {
"Summary": "description",
"Kurzbezeichnung": "code", # Abkürzung z.B. "DM"
"KarateRelevanz": "karate_relevance",
"PrimaryCapability": "skill_names", # Verknüpfte Fähigkeiten
}
# ------------------------------------------------------------------ #
# Wikitext → Plaintext #
# ------------------------------------------------------------------ #
def wikitext_to_plaintext(wikitext: str) -> str:
"""Entfernt Wikitext-Formatierungen und gibt lesbaren Plaintext zurück."""
text = wikitext
# Externe Links: [https://example.com Text] → Text
text = re.sub(r'\[https?://\S+\s+([^\]]+)\]', r'\1', text)
# Interne Links mit Alias: [[Link|Text]] → Text
text = re.sub(r'\[\[([^|\]]+)\|([^\]]+)\]\]', r'\2', text)
# Interne Links ohne Alias: [[Link]] → Link (Unterstriche → Leerzeichen)
text = re.sub(r'\[\[([^\]]+)\]\]', lambda m: m.group(1).replace('_', ' '), text)
# Templates entfernen (einzeilig)
text = re.sub(r'\{\{[^}]+\}\}', '', text)
# Fettdruck und Kursiv
text = re.sub(r"'''(.+?)'''", r'\1', text)
text = re.sub(r"''(.+?)''", r'\1', text)
# HTML-Tags entfernen (inkl. <br>)
text = re.sub(r'<br\s*/?>', '\n', text, flags=re.IGNORECASE)
text = re.sub(r'<[^>]+>', '', text)
# Überschriften
text = re.sub(r'={2,6}\s*(.+?)\s*={2,6}', r'\n\1\n', text)
# Aufzählungszeichen normalisieren
text = re.sub(r'^[*#:;]+\s*', '- ', text, flags=re.MULTILINE)
# Mehrfache Leerzeilen normalisieren
text = re.sub(r'\n{3,}', '\n\n', text)
return text.strip()
def wiki_name_to_label(wiki_name: str) -> str:
"""Wandelt Wiki-Seitennamen in lesbare Labels um: Plyometrisches_Training → Plyometrisches Training"""
return wiki_name.replace('_', ' ').strip()
# ------------------------------------------------------------------ #
# Parsing-Hilfsfunktionen #
# ------------------------------------------------------------------ #
def parse_duration(raw: str) -> tuple[Optional[int], Optional[int]]:
"""
"10" → (10, 10) (Plandauer ist immer eine einzelne Zahl in Minuten)
"10-15" → (10, 15)
"""
if not raw:
return None, None
numbers = re.findall(r'\d+', raw)
if not numbers:
return None, None
if len(numbers) == 1:
val = int(numbers[0])
return val, val
return int(numbers[0]), int(numbers[1])
def parse_group_size(raw: str) -> tuple[Optional[int], Optional[int]]:
"""
"2" → (2, 2) (Gruppengröße ist immer eine einzelne Zahl)
"""
if not raw:
return None, None
numbers = re.findall(r'\d+', raw)
if not numbers:
return None, None
val = int(numbers[0])
if len(numbers) == 1:
return val, None # Minimum, kein Maximum angegeben
return int(numbers[0]), int(numbers[1])
def parse_equipment(raw: list[str]) -> list[str]:
"""Normalisiert Equipment-Liste: ["Ausdruck"] oder ["Gewicht"] → bereinigt"""
result = []
for item in raw:
for part in re.split(r'[,;/]', item):
cleaned = wiki_name_to_label(part.strip())
if cleaned:
result.append(cleaned)
return result
def map_capability_level(level_str: str) -> str:
"""Wandelt Integer-Level in kanonischen Stufen-Slug: "3""aufbau" """
return CAPABILITY_LEVEL_MAP.get(level_str.strip(), "basis")
def parse_wiki_relevance_level(raw: str | None) -> Optional[int]:
"""
RelevanzLevel aus Wiki (typisch 13). Erlaubt Zahl oder Text mit Ziffer z.B. "Level_2".
"""
if raw is None:
return None
s = str(raw).strip()
if not s:
return None
digits = re.findall(r"\d+", s)
if not digits:
return None
try:
n = int(digits[0])
except ValueError:
return None
if n < 1 or n > 3:
return None
return n
# ------------------------------------------------------------------ #
# SMW-Property-Label → Mapper-Zielfeld (Werte wie in EXERCISE_PROPERTY_MAP) #
# browse_subject liefert Anzeigenamen, nicht zwingend interne Property-IDs. #
# ------------------------------------------------------------------ #
def _norm_prop_synonym(name: str) -> str:
s = (name or "").strip().lower()
for a, b in (("ä", "ae"), ("ö", "oe"), ("ü", "ue"), ("ß", "ss")):
s = s.replace(a, b)
return "".join(c for c in s if c.isalnum())
# Alternative SMW-Anzeigelabel → Zielfeld (gleiche Targets wie SKILL_PROPERTY_MAP)
SKILL_PROPERTY_SYNONYM_TO_TARGET: dict[str, str] = {
"karaterelevanz": "karate_relevance",
"karatebezug": "karate_relevance",
"relevanzlevel": "relevance_level",
"wikirelevanz": "karate_relevance",
}
def _skill_property_target(prop_name: str) -> str | None:
"""Ermittelt Zielfeld für eine Skill-SMW-Property."""
if prop_name in SKILL_PROPERTY_MAP:
return SKILL_PROPERTY_MAP[prop_name]
n = _norm_prop_synonym(prop_name)
return SKILL_PROPERTY_SYNONYM_TO_TARGET.get(n)
# alternative Labels → Zielfeld-Name (gleiche Strings wie Werte in EXERCISE_PROPERTY_MAP)
EXERCISE_PROPERTY_SYNONYM_TO_TARGET: dict[str, str] = {
"primarycapability": "skill_names",
"hauptfaehigkeit": "skill_names",
"primaerefaehigkeit": "skill_names",
"hauptfhigkeit": "skill_names",
"hauptfahigkeit": "skill_names",
"capabilitylevel": "skill_levels_raw",
"faehigkeitsstufe": "skill_levels_raw",
"faehigkeitslevel": "skill_levels_raw",
"capabilitystufe": "skill_levels_raw",
"stilrichtung": "style_names",
"trainingsstilrichtung": "style_names",
}
def _exercise_property_target(prop_name: str) -> str | None:
"""Ermittelt Zielfeld für eine SMW-Property; None = unbekannt."""
if prop_name in EXERCISE_PROPERTY_MAP:
return EXERCISE_PROPERTY_MAP[prop_name]
n = _norm_prop_synonym(prop_name)
if n in EXERCISE_PROPERTY_SYNONYM_TO_TARGET:
return EXERCISE_PROPERTY_SYNONYM_TO_TARGET[n]
nlow = (prop_name or "").lower()
if "primary" in nlow and "capab" in nlow and "level" not in nlow:
return "skill_names"
if "capab" in nlow and "level" in nlow:
return "skill_levels_raw"
return None
# ------------------------------------------------------------------ #
# Haupt-Mapping-Funktion #
# ------------------------------------------------------------------ #
def map_wiki_to_exercise(
page_title: str,
wiki_page_id: Optional[int],
smw_props: dict,
) -> dict:
"""
Wandelt SMW-Properties einer Wiki-Seite in ein Exercise-Dict um.
Args:
page_title: Titel der Wiki-Seite (Fallback für title)
wiki_page_id: Interne MediaWiki-Seiten-ID
smw_props: {property_name: [value, ...]} aus SmwClient.browse_subject()
Returns:
Dict mit gemappten Feldern + Katalog-Listen für ID-Lookup.
"""
mapped: dict = {
"title": page_title,
"wiki_page_id": wiki_page_id,
# Tracking
"import_source": "mediawiki",
"import_id": page_title,
# Defaults
"visibility": "private",
"status": "draft",
# Katalog-Referenzen (Name → ID-Lookup erfolgt im Router)
"focus_area_names": [],
"target_group_names": [],
"age_group_names": [],
"skill_names": [],
"skill_levels_raw": [], # Integer-Strings ["3", "2"]
"style_names": [],
"method_names": [],
# Equipment
"equipment": [],
# Warnungen für unbekannte Katalog-Werte
"warnings": [],
}
for prop_name, values in smw_props.items():
if not values:
continue
target = _exercise_property_target(prop_name)
if not target:
continue
# Ersten Wert oder ganzes Array
first_value = values[0] if isinstance(values, list) else values
if target == "title_override":
mapped["title"] = wiki_name_to_label(first_value)
elif target in ("goal", "execution", "summary", "trainer_notes"):
mapped[target] = wikitext_to_plaintext(first_value)
elif target == "duration_raw":
dur_min, dur_max = parse_duration(first_value)
mapped["duration_min"] = dur_min
mapped["duration_max"] = dur_max
elif target == "group_size_raw":
gs_min, gs_max = parse_group_size(first_value)
mapped["group_size_min"] = gs_min
mapped["group_size_max"] = gs_max
elif target == "equipment_raw":
mapped["equipment"] = parse_equipment(values if isinstance(values, list) else [values])
elif target == "keywords_raw":
# Keywords für spätere Tag-Implementierung speichern
mapped["keywords"] = [wiki_name_to_label(v) for v in (values if isinstance(values, list) else [values])]
elif target == "focus_area_names":
mapped["focus_area_names"] = [wiki_name_to_label(v) for v in (values if isinstance(values, list) else [values])]
elif target == "target_group_names":
mapped["target_group_names"] = [wiki_name_to_label(v) for v in (values if isinstance(values, list) else [values])]
elif target == "age_group_names":
mapped["age_group_names"] = [wiki_name_to_label(v) for v in (values if isinstance(values, list) else [values])]
elif target == "method_names":
mapped["method_names"] = [wiki_name_to_label(v) for v in (values if isinstance(values, list) else [values])]
elif target == "style_names":
mapped["style_names"] = [wiki_name_to_label(v) for v in (values if isinstance(values, list) else [values])]
elif target == "skill_names":
mapped["skill_names"] = [wiki_name_to_label(v) for v in (values if isinstance(values, list) else [values])]
elif target == "skill_levels_raw":
mapped["skill_levels_raw"] = list(values) if isinstance(values, list) else [values]
return mapped
def build_skill_assignments(mapped: dict) -> list[dict]:
"""
Erstellt Skill-Zuordnungen aus PrimaryCapability + CapabilityLevel.
CapabilityLevel [3, 2] korrespondiert mit PrimaryCapability [Schnellkraft, Schnelligkeitsausdauer]
→ target_level als kanonischer Slug (basis … optimierung), DB VARCHAR.
"""
skills = mapped.get("skill_names", [])
levels = mapped.get("skill_levels_raw", [])
assignments = []
for idx, skill_name in enumerate(skills):
level_str = levels[idx] if idx < len(levels) else "1"
try:
raw = str(level_str).strip()
except (TypeError, AttributeError):
raw = "1"
target_slug = map_capability_level(raw) if raw else "basis"
assignments.append({
"skill_name": skill_name,
"target_level": target_slug,
"required_level": None,
"intensity": "mittel",
"is_primary": False,
})
return assignments
def map_wiki_to_skill(
page_title: str,
wiki_page_id: Optional[int],
smw_props: dict,
) -> dict:
"""Wandelt SMW-Properties einer Fähigkeitsbeschreibung-Seite in ein Skill-Dict um."""
mapped = {
"name": page_title,
"wiki_page_id": wiki_page_id,
"import_source": "mediawiki",
"import_id": page_title,
"warnings": [],
}
description_text: Optional[str] = None
for prop_name, values in smw_props.items():
if not values:
continue
target = _skill_property_target(prop_name)
if not target:
continue
first_value = values[0] if isinstance(values, list) else values
if target == "description":
description_text = wikitext_to_plaintext(str(first_value))
elif target == "karate_relevance":
mapped["karate_relevance"] = wikitext_to_plaintext(str(first_value))
elif target == "relevance_level":
parsed = parse_wiki_relevance_level(first_value if isinstance(first_value, str) else str(first_value))
if parsed is None:
mapped["warnings"].append(f"Unbekanntes RelevanzLevel: {first_value!r}")
else:
mapped["relevance_level"] = parsed
if description_text:
mapped["description"] = description_text
return mapped
def map_wiki_to_method(
page_title: str,
wiki_page_id: Optional[int],
smw_props: dict,
) -> dict:
"""Wandelt SMW-Properties einer Methodenbeschreibung-Seite in ein Method-Dict um."""
mapped = {
"name": page_title,
"wiki_page_id": wiki_page_id,
"import_source": "mediawiki",
"import_id": page_title,
"warnings": [],
}
description_parts = []
for prop_name, values in smw_props.items():
if not values:
continue
target = METHOD_PROPERTY_MAP.get(prop_name)
if not target:
continue
first_value = values[0] if isinstance(values, list) else values
if target == "description":
description_parts.insert(0, wikitext_to_plaintext(first_value))
elif target == "code":
mapped["code"] = first_value.strip()
elif target == "karate_relevance":
rel = wikitext_to_plaintext(first_value)
description_parts.append(f"\nKarate-Relevanz: {rel}")
if description_parts:
mapped["description"] = "\n".join(description_parts).strip()
return mapped