- Added support for style direction mappings in the backend, allowing for improved categorization of exercises. - Introduced a new function to normalize property synonyms, enhancing the mapping of exercise properties. - Updated the exercise catalog assignment logic to include style directions, ensuring proper database entries. - Enhanced the ExercisesListPage with new filtering options for style directions, improving user experience and search capabilities.
420 lines
15 KiB
Python
420 lines
15 KiB
Python
"""
|
||
Semantic MediaWiki → Shinkan Field Mapper
|
||
|
||
Wandelt SMW-Properties von karatetrainer.net in lokale DB-Felder um.
|
||
Property-Namen wurden via discover_properties() auf echten Wiki-Seiten ermittelt.
|
||
|
||
Entdeckte Kategorien:
|
||
Übungen: Kategorie: Übungen (auch "Übungen Karate", "Übungen allgemein")
|
||
Fähigkeiten: Fähigkeitsbeschreibung
|
||
Methoden: Methodenbeschreibung
|
||
"""
|
||
import re
|
||
import logging
|
||
from typing import Optional
|
||
|
||
logger = logging.getLogger(__name__)
|
||
|
||
# ------------------------------------------------------------------ #
|
||
# CapabilityLevel Integer → benannte Stufen #
|
||
# ------------------------------------------------------------------ #
|
||
|
||
# Mapping: SMW-Integer → Shinkan-Stufenname
|
||
CAPABILITY_LEVEL_MAP = {
|
||
"1": "basis",
|
||
"2": "grundlagen",
|
||
"3": "aufbau",
|
||
"4": "fortgeschritten",
|
||
"5": "optimierung",
|
||
}
|
||
|
||
|
||
# ------------------------------------------------------------------ #
|
||
# SMW Property → lokales Feld #
|
||
# Echte Namen von karatetrainer.net (via discover_properties) #
|
||
# ------------------------------------------------------------------ #
|
||
|
||
# Übungen (exercises)
|
||
EXERCISE_PROPERTY_MAP = {
|
||
# Kern-Felder
|
||
"Übungsbezeichnung": "title_override", # Übungsname (bevorzugt ggü. Seitentitel)
|
||
"Ziel": "goal",
|
||
"Durchführung": "execution",
|
||
"Summary": "summary",
|
||
"Hinweise": "trainer_notes",
|
||
"Plandauer": "duration_raw", # Zahl in Minuten z.B. "10"
|
||
"Gruppengröße": "group_size_raw", # Zahl z.B. "2"
|
||
"Hilfsmittel": "equipment_raw", # Komma-Liste / einzelner Wert
|
||
"Schlüsselworte": "keywords_raw", # Keywords (nicht direkt in DB, für spätere Tags)
|
||
# Katalog-Felder (Name → ID Lookup)
|
||
"Übungstyp": "focus_area_names", # "Karate" → focus_area
|
||
"Zielgruppe": "target_group_names",
|
||
"Altersgruppe": "age_group_names",
|
||
"Trainingsmethode": "method_names", # Wiki-Seitenname z.B. "Plyometrisches_Training"
|
||
"Stilrichtung": "style_names", # z. B. Shotokan; siehe EXERCISE_PROPERTY_SYNONYM_TO_TARGET
|
||
# Fähigkeiten (als Namen + Level)
|
||
"PrimaryCapability": "skill_names", # Skill-Namen (können mehrere sein)
|
||
"CapabilityLevel": "skill_levels_raw", # Integer-Levels ["3", "2"] → aufbau, grundlagen
|
||
# Weitere Felder (optional)
|
||
"Graduierung": "graduierung", # "0 - Anfänger" (zukünftige Nutzung)
|
||
"Lernstufe": "lernstufe", # "Lernstufe_1_-_Erlernen_und_Festigen"
|
||
}
|
||
|
||
# Fähigkeiten (skills) – Kategorie: Fähigkeitsbeschreibung
|
||
SKILL_PROPERTY_MAP = {
|
||
"Summary": "description",
|
||
"KarateRelevanz": "karate_relevance", # Wird in description ergänzt
|
||
"RelevanzLevel": "relevance_level", # 1-3, nicht direkt in skills DB
|
||
}
|
||
|
||
# Trainingsmethoden – Kategorie: Methodenbeschreibung
|
||
METHOD_PROPERTY_MAP = {
|
||
"Summary": "description",
|
||
"Kurzbezeichnung": "code", # Abkürzung z.B. "DM"
|
||
"KarateRelevanz": "karate_relevance",
|
||
"PrimaryCapability": "skill_names", # Verknüpfte Fähigkeiten
|
||
}
|
||
|
||
|
||
# ------------------------------------------------------------------ #
|
||
# Wikitext → Plaintext #
|
||
# ------------------------------------------------------------------ #
|
||
|
||
def wikitext_to_plaintext(wikitext: str) -> str:
|
||
"""Entfernt Wikitext-Formatierungen und gibt lesbaren Plaintext zurück."""
|
||
text = wikitext
|
||
|
||
# Externe Links: [https://example.com Text] → Text
|
||
text = re.sub(r'\[https?://\S+\s+([^\]]+)\]', r'\1', text)
|
||
|
||
# Interne Links mit Alias: [[Link|Text]] → Text
|
||
text = re.sub(r'\[\[([^|\]]+)\|([^\]]+)\]\]', r'\2', text)
|
||
|
||
# Interne Links ohne Alias: [[Link]] → Link (Unterstriche → Leerzeichen)
|
||
text = re.sub(r'\[\[([^\]]+)\]\]', lambda m: m.group(1).replace('_', ' '), text)
|
||
|
||
# Templates entfernen (einzeilig)
|
||
text = re.sub(r'\{\{[^}]+\}\}', '', text)
|
||
|
||
# Fettdruck und Kursiv
|
||
text = re.sub(r"'''(.+?)'''", r'\1', text)
|
||
text = re.sub(r"''(.+?)''", r'\1', text)
|
||
|
||
# HTML-Tags entfernen (inkl. <br>)
|
||
text = re.sub(r'<br\s*/?>', '\n', text, flags=re.IGNORECASE)
|
||
text = re.sub(r'<[^>]+>', '', text)
|
||
|
||
# Überschriften
|
||
text = re.sub(r'={2,6}\s*(.+?)\s*={2,6}', r'\n\1\n', text)
|
||
|
||
# Aufzählungszeichen normalisieren
|
||
text = re.sub(r'^[*#:;]+\s*', '- ', text, flags=re.MULTILINE)
|
||
|
||
# Mehrfache Leerzeilen normalisieren
|
||
text = re.sub(r'\n{3,}', '\n\n', text)
|
||
|
||
return text.strip()
|
||
|
||
|
||
def wiki_name_to_label(wiki_name: str) -> str:
|
||
"""Wandelt Wiki-Seitennamen in lesbare Labels um: Plyometrisches_Training → Plyometrisches Training"""
|
||
return wiki_name.replace('_', ' ').strip()
|
||
|
||
|
||
# ------------------------------------------------------------------ #
|
||
# Parsing-Hilfsfunktionen #
|
||
# ------------------------------------------------------------------ #
|
||
|
||
def parse_duration(raw: str) -> tuple[Optional[int], Optional[int]]:
|
||
"""
|
||
"10" → (10, 10) (Plandauer ist immer eine einzelne Zahl in Minuten)
|
||
"10-15" → (10, 15)
|
||
"""
|
||
if not raw:
|
||
return None, None
|
||
numbers = re.findall(r'\d+', raw)
|
||
if not numbers:
|
||
return None, None
|
||
if len(numbers) == 1:
|
||
val = int(numbers[0])
|
||
return val, val
|
||
return int(numbers[0]), int(numbers[1])
|
||
|
||
|
||
def parse_group_size(raw: str) -> tuple[Optional[int], Optional[int]]:
|
||
"""
|
||
"2" → (2, 2) (Gruppengröße ist immer eine einzelne Zahl)
|
||
"""
|
||
if not raw:
|
||
return None, None
|
||
numbers = re.findall(r'\d+', raw)
|
||
if not numbers:
|
||
return None, None
|
||
val = int(numbers[0])
|
||
if len(numbers) == 1:
|
||
return val, None # Minimum, kein Maximum angegeben
|
||
return int(numbers[0]), int(numbers[1])
|
||
|
||
|
||
def parse_equipment(raw: list[str]) -> list[str]:
|
||
"""Normalisiert Equipment-Liste: ["Ausdruck"] oder ["Gewicht"] → bereinigt"""
|
||
result = []
|
||
for item in raw:
|
||
for part in re.split(r'[,;/]', item):
|
||
cleaned = wiki_name_to_label(part.strip())
|
||
if cleaned:
|
||
result.append(cleaned)
|
||
return result
|
||
|
||
|
||
def map_capability_level(level_str: str) -> str:
|
||
"""Wandelt Integer-Level in kanonischen Stufen-Slug: "3" → "aufbau" """
|
||
return CAPABILITY_LEVEL_MAP.get(level_str.strip(), "basis")
|
||
|
||
|
||
# ------------------------------------------------------------------ #
|
||
# SMW-Property-Label → Mapper-Zielfeld (Werte wie in EXERCISE_PROPERTY_MAP) #
|
||
# browse_subject liefert Anzeigenamen, nicht zwingend interne Property-IDs. #
|
||
# ------------------------------------------------------------------ #
|
||
|
||
def _norm_prop_synonym(name: str) -> str:
|
||
s = (name or "").strip().lower()
|
||
for a, b in (("ä", "ae"), ("ö", "oe"), ("ü", "ue"), ("ß", "ss")):
|
||
s = s.replace(a, b)
|
||
return "".join(c for c in s if c.isalnum())
|
||
|
||
|
||
# alternative Labels → Zielfeld-Name (gleiche Strings wie Werte in EXERCISE_PROPERTY_MAP)
|
||
EXERCISE_PROPERTY_SYNONYM_TO_TARGET: dict[str, str] = {
|
||
"primarycapability": "skill_names",
|
||
"hauptfaehigkeit": "skill_names",
|
||
"primaerefaehigkeit": "skill_names",
|
||
"hauptfhigkeit": "skill_names",
|
||
"hauptfahigkeit": "skill_names",
|
||
"capabilitylevel": "skill_levels_raw",
|
||
"faehigkeitsstufe": "skill_levels_raw",
|
||
"faehigkeitslevel": "skill_levels_raw",
|
||
"capabilitystufe": "skill_levels_raw",
|
||
"stilrichtung": "style_names",
|
||
"trainingsstilrichtung": "style_names",
|
||
}
|
||
|
||
|
||
def _exercise_property_target(prop_name: str) -> str | None:
|
||
"""Ermittelt Zielfeld für eine SMW-Property; None = unbekannt."""
|
||
if prop_name in EXERCISE_PROPERTY_MAP:
|
||
return EXERCISE_PROPERTY_MAP[prop_name]
|
||
n = _norm_prop_synonym(prop_name)
|
||
if n in EXERCISE_PROPERTY_SYNONYM_TO_TARGET:
|
||
return EXERCISE_PROPERTY_SYNONYM_TO_TARGET[n]
|
||
nlow = (prop_name or "").lower()
|
||
if "primary" in nlow and "capab" in nlow and "level" not in nlow:
|
||
return "skill_names"
|
||
if "capab" in nlow and "level" in nlow:
|
||
return "skill_levels_raw"
|
||
return None
|
||
|
||
|
||
# ------------------------------------------------------------------ #
|
||
# Haupt-Mapping-Funktion #
|
||
# ------------------------------------------------------------------ #
|
||
|
||
def map_wiki_to_exercise(
|
||
page_title: str,
|
||
wiki_page_id: Optional[int],
|
||
smw_props: dict,
|
||
) -> dict:
|
||
"""
|
||
Wandelt SMW-Properties einer Wiki-Seite in ein Exercise-Dict um.
|
||
|
||
Args:
|
||
page_title: Titel der Wiki-Seite (Fallback für title)
|
||
wiki_page_id: Interne MediaWiki-Seiten-ID
|
||
smw_props: {property_name: [value, ...]} aus SmwClient.browse_subject()
|
||
|
||
Returns:
|
||
Dict mit gemappten Feldern + Katalog-Listen für ID-Lookup.
|
||
"""
|
||
mapped: dict = {
|
||
"title": page_title,
|
||
"wiki_page_id": wiki_page_id,
|
||
# Tracking
|
||
"import_source": "mediawiki",
|
||
"import_id": page_title,
|
||
# Defaults
|
||
"visibility": "private",
|
||
"status": "draft",
|
||
# Katalog-Referenzen (Name → ID-Lookup erfolgt im Router)
|
||
"focus_area_names": [],
|
||
"target_group_names": [],
|
||
"age_group_names": [],
|
||
"skill_names": [],
|
||
"skill_levels_raw": [], # Integer-Strings ["3", "2"]
|
||
"style_names": [],
|
||
"method_names": [],
|
||
# Equipment
|
||
"equipment": [],
|
||
# Warnungen für unbekannte Katalog-Werte
|
||
"warnings": [],
|
||
}
|
||
|
||
for prop_name, values in smw_props.items():
|
||
if not values:
|
||
continue
|
||
|
||
target = _exercise_property_target(prop_name)
|
||
if not target:
|
||
continue
|
||
|
||
# Ersten Wert oder ganzes Array
|
||
first_value = values[0] if isinstance(values, list) else values
|
||
|
||
if target == "title_override":
|
||
mapped["title"] = wiki_name_to_label(first_value)
|
||
|
||
elif target in ("goal", "execution", "summary", "trainer_notes"):
|
||
mapped[target] = wikitext_to_plaintext(first_value)
|
||
|
||
elif target == "duration_raw":
|
||
dur_min, dur_max = parse_duration(first_value)
|
||
mapped["duration_min"] = dur_min
|
||
mapped["duration_max"] = dur_max
|
||
|
||
elif target == "group_size_raw":
|
||
gs_min, gs_max = parse_group_size(first_value)
|
||
mapped["group_size_min"] = gs_min
|
||
mapped["group_size_max"] = gs_max
|
||
|
||
elif target == "equipment_raw":
|
||
mapped["equipment"] = parse_equipment(values if isinstance(values, list) else [values])
|
||
|
||
elif target == "keywords_raw":
|
||
# Keywords für spätere Tag-Implementierung speichern
|
||
mapped["keywords"] = [wiki_name_to_label(v) for v in (values if isinstance(values, list) else [values])]
|
||
|
||
elif target == "focus_area_names":
|
||
mapped["focus_area_names"] = [wiki_name_to_label(v) for v in (values if isinstance(values, list) else [values])]
|
||
|
||
elif target == "target_group_names":
|
||
mapped["target_group_names"] = [wiki_name_to_label(v) for v in (values if isinstance(values, list) else [values])]
|
||
|
||
elif target == "age_group_names":
|
||
mapped["age_group_names"] = [wiki_name_to_label(v) for v in (values if isinstance(values, list) else [values])]
|
||
|
||
elif target == "method_names":
|
||
mapped["method_names"] = [wiki_name_to_label(v) for v in (values if isinstance(values, list) else [values])]
|
||
|
||
elif target == "style_names":
|
||
mapped["style_names"] = [wiki_name_to_label(v) for v in (values if isinstance(values, list) else [values])]
|
||
|
||
elif target == "skill_names":
|
||
mapped["skill_names"] = [wiki_name_to_label(v) for v in (values if isinstance(values, list) else [values])]
|
||
|
||
elif target == "skill_levels_raw":
|
||
mapped["skill_levels_raw"] = list(values) if isinstance(values, list) else [values]
|
||
|
||
return mapped
|
||
|
||
|
||
def build_skill_assignments(mapped: dict) -> list[dict]:
|
||
"""
|
||
Erstellt Skill-Zuordnungen aus PrimaryCapability + CapabilityLevel.
|
||
|
||
CapabilityLevel [3, 2] korrespondiert mit PrimaryCapability [Schnellkraft, Schnelligkeitsausdauer]
|
||
→ target_level als kanonischer Slug (basis … optimierung), DB VARCHAR.
|
||
"""
|
||
skills = mapped.get("skill_names", [])
|
||
levels = mapped.get("skill_levels_raw", [])
|
||
|
||
assignments = []
|
||
for idx, skill_name in enumerate(skills):
|
||
level_str = levels[idx] if idx < len(levels) else "1"
|
||
try:
|
||
raw = str(level_str).strip()
|
||
except (TypeError, AttributeError):
|
||
raw = "1"
|
||
target_slug = map_capability_level(raw) if raw else "basis"
|
||
|
||
assignments.append({
|
||
"skill_name": skill_name,
|
||
"target_level": target_slug,
|
||
"required_level": None,
|
||
"intensity": None,
|
||
"is_primary": idx == 0,
|
||
})
|
||
return assignments
|
||
|
||
|
||
def map_wiki_to_skill(
|
||
page_title: str,
|
||
wiki_page_id: Optional[int],
|
||
smw_props: dict,
|
||
) -> dict:
|
||
"""Wandelt SMW-Properties einer Fähigkeitsbeschreibung-Seite in ein Skill-Dict um."""
|
||
mapped = {
|
||
"name": page_title,
|
||
"wiki_page_id": wiki_page_id,
|
||
"import_source": "mediawiki",
|
||
"import_id": page_title,
|
||
"warnings": [],
|
||
}
|
||
|
||
description_parts = []
|
||
|
||
for prop_name, values in smw_props.items():
|
||
if not values:
|
||
continue
|
||
target = SKILL_PROPERTY_MAP.get(prop_name)
|
||
if not target:
|
||
continue
|
||
first_value = values[0] if isinstance(values, list) else values
|
||
|
||
if target == "description":
|
||
description_parts.insert(0, wikitext_to_plaintext(first_value))
|
||
elif target == "karate_relevance":
|
||
rel = wikitext_to_plaintext(first_value)
|
||
description_parts.append(f"\nKarate-Relevanz: {rel}")
|
||
|
||
if description_parts:
|
||
mapped["description"] = "\n".join(description_parts).strip()
|
||
|
||
return mapped
|
||
|
||
|
||
def map_wiki_to_method(
|
||
page_title: str,
|
||
wiki_page_id: Optional[int],
|
||
smw_props: dict,
|
||
) -> dict:
|
||
"""Wandelt SMW-Properties einer Methodenbeschreibung-Seite in ein Method-Dict um."""
|
||
mapped = {
|
||
"name": page_title,
|
||
"wiki_page_id": wiki_page_id,
|
||
"import_source": "mediawiki",
|
||
"import_id": page_title,
|
||
"warnings": [],
|
||
}
|
||
|
||
description_parts = []
|
||
|
||
for prop_name, values in smw_props.items():
|
||
if not values:
|
||
continue
|
||
target = METHOD_PROPERTY_MAP.get(prop_name)
|
||
if not target:
|
||
continue
|
||
first_value = values[0] if isinstance(values, list) else values
|
||
|
||
if target == "description":
|
||
description_parts.insert(0, wikitext_to_plaintext(first_value))
|
||
elif target == "code":
|
||
mapped["code"] = first_value.strip()
|
||
elif target == "karate_relevance":
|
||
rel = wikitext_to_plaintext(first_value)
|
||
description_parts.append(f"\nKarate-Relevanz: {rel}")
|
||
|
||
if description_parts:
|
||
mapped["description"] = "\n".join(description_parts).strip()
|
||
|
||
return mapped
|