"""
Semantic MediaWiki → Shinkan Field Mapper
Wandelt SMW-Properties von karatetrainer.net in lokale DB-Felder um.
Property-Namen wurden via discover_properties() auf echten Wiki-Seiten ermittelt.
Entdeckte Kategorien:
Übungen: Kategorie: Übungen (auch "Übungen Karate", "Übungen allgemein")
Fähigkeiten: Fähigkeitsbeschreibung
Methoden: Methodenbeschreibung
"""
import re
import logging
from typing import Optional
logger = logging.getLogger(__name__)
# ------------------------------------------------------------------ #
# CapabilityLevel Integer → benannte Stufen #
# ------------------------------------------------------------------ #
# Mapping: SMW-Integer → Shinkan-Stufenname
CAPABILITY_LEVEL_MAP = {
"1": "basis",
"2": "grundlagen",
"3": "aufbau",
"4": "fortgeschritten",
"5": "optimierung",
}
# ------------------------------------------------------------------ #
# SMW Property → lokales Feld #
# Echte Namen von karatetrainer.net (via discover_properties) #
# ------------------------------------------------------------------ #
# Übungen (exercises)
EXERCISE_PROPERTY_MAP = {
# Kern-Felder
"Übungsbezeichnung": "title_override", # Übungsname (bevorzugt ggü. Seitentitel)
"Ziel": "goal",
"Durchführung": "execution",
"Summary": "summary",
"Hinweise": "trainer_notes",
"Plandauer": "duration_raw", # Zahl in Minuten z.B. "10"
"Gruppengröße": "group_size_raw", # Zahl z.B. "2"
"Hilfsmittel": "equipment_raw", # Komma-Liste / einzelner Wert
"Schlüsselworte": "keywords_raw", # Keywords (nicht direkt in DB, für spätere Tags)
# Katalog-Felder (Name → ID Lookup)
"Übungstyp": "focus_area_names", # "Karate" → focus_area
"Zielgruppe": "target_group_names",
"Altersgruppe": "age_group_names",
"Trainingsmethode": "method_names", # Wiki-Seitenname z.B. "Plyometrisches_Training"
"Stilrichtung": "style_names", # z. B. Shotokan; siehe EXERCISE_PROPERTY_SYNONYM_TO_TARGET
# Fähigkeiten (als Namen + Level)
"PrimaryCapability": "skill_names", # Skill-Namen (können mehrere sein)
"CapabilityLevel": "skill_levels_raw", # Integer-Levels ["3", "2"] → aufbau, grundlagen
# Weitere Felder (optional)
"Graduierung": "graduierung", # "0 - Anfänger" (zukünftige Nutzung)
"Lernstufe": "lernstufe", # "Lernstufe_1_-_Erlernen_und_Festigen"
}
# Fähigkeiten (skills) – Kategorie: Fähigkeitsbeschreibung
SKILL_PROPERTY_MAP = {
"Summary": "description",
"KarateRelevanz": "karate_relevance", # Spalte skills.karate_relevance (+ Wiki-Import)
"RelevanzLevel": "relevance_level", # Spalte skills.relevance_level 1–3
}
# Trainingsmethoden – Kategorie: Methodenbeschreibung
METHOD_PROPERTY_MAP = {
"Summary": "description",
"Kurzbezeichnung": "code", # Abkürzung z.B. "DM"
"KarateRelevanz": "karate_relevance",
"PrimaryCapability": "skill_names", # Verknüpfte Fähigkeiten
}
# ------------------------------------------------------------------ #
# Wikitext → Plaintext #
# ------------------------------------------------------------------ #
def wikitext_to_plaintext(wikitext: str) -> str:
"""Entfernt Wikitext-Formatierungen und gibt lesbaren Plaintext zurück."""
text = wikitext
# Externe Links: [https://example.com Text] → Text
text = re.sub(r'\[https?://\S+\s+([^\]]+)\]', r'\1', text)
# Interne Links mit Alias: [[Link|Text]] → Text
text = re.sub(r'\[\[([^|\]]+)\|([^\]]+)\]\]', r'\2', text)
# Interne Links ohne Alias: [[Link]] → Link (Unterstriche → Leerzeichen)
text = re.sub(r'\[\[([^\]]+)\]\]', lambda m: m.group(1).replace('_', ' '), text)
# Templates entfernen (einzeilig)
text = re.sub(r'\{\{[^}]+\}\}', '', text)
# Fettdruck und Kursiv
text = re.sub(r"'''(.+?)'''", r'\1', text)
text = re.sub(r"''(.+?)''", r'\1', text)
# HTML-Tags entfernen (inkl.
)
text = re.sub(r'
', '\n', text, flags=re.IGNORECASE)
text = re.sub(r'<[^>]+>', '', text)
# Überschriften
text = re.sub(r'={2,6}\s*(.+?)\s*={2,6}', r'\n\1\n', text)
# Aufzählungszeichen normalisieren
text = re.sub(r'^[*#:;]+\s*', '- ', text, flags=re.MULTILINE)
# Mehrfache Leerzeilen normalisieren
text = re.sub(r'\n{3,}', '\n\n', text)
return text.strip()
def wiki_name_to_label(wiki_name: str) -> str:
"""Wandelt Wiki-Seitennamen in lesbare Labels um: Plyometrisches_Training → Plyometrisches Training"""
return wiki_name.replace('_', ' ').strip()
# ------------------------------------------------------------------ #
# Parsing-Hilfsfunktionen #
# ------------------------------------------------------------------ #
def parse_duration(raw: str) -> tuple[Optional[int], Optional[int]]:
"""
"10" → (10, 10) (Plandauer ist immer eine einzelne Zahl in Minuten)
"10-15" → (10, 15)
"""
if not raw:
return None, None
numbers = re.findall(r'\d+', raw)
if not numbers:
return None, None
if len(numbers) == 1:
val = int(numbers[0])
return val, val
return int(numbers[0]), int(numbers[1])
def parse_group_size(raw: str) -> tuple[Optional[int], Optional[int]]:
"""
"2" → (2, 2) (Gruppengröße ist immer eine einzelne Zahl)
"""
if not raw:
return None, None
numbers = re.findall(r'\d+', raw)
if not numbers:
return None, None
val = int(numbers[0])
if len(numbers) == 1:
return val, None # Minimum, kein Maximum angegeben
return int(numbers[0]), int(numbers[1])
def parse_equipment(raw: list[str]) -> list[str]:
"""Normalisiert Equipment-Liste: ["Ausdruck"] oder ["Gewicht"] → bereinigt"""
result = []
for item in raw:
for part in re.split(r'[,;/]', item):
cleaned = wiki_name_to_label(part.strip())
if cleaned:
result.append(cleaned)
return result
def map_capability_level(level_str: str) -> str:
"""Wandelt Integer-Level in kanonischen Stufen-Slug: "3" → "aufbau" """
return CAPABILITY_LEVEL_MAP.get(level_str.strip(), "basis")
def parse_wiki_relevance_level(raw: str | None) -> Optional[int]:
"""
RelevanzLevel aus Wiki (typisch 1–3). Erlaubt Zahl oder Text mit Ziffer z.B. "Level_2".
"""
if raw is None:
return None
s = str(raw).strip()
if not s:
return None
digits = re.findall(r"\d+", s)
if not digits:
return None
try:
n = int(digits[0])
except ValueError:
return None
if n < 1 or n > 3:
return None
return n
# ------------------------------------------------------------------ #
# SMW-Property-Label → Mapper-Zielfeld (Werte wie in EXERCISE_PROPERTY_MAP) #
# browse_subject liefert Anzeigenamen, nicht zwingend interne Property-IDs. #
# ------------------------------------------------------------------ #
def _norm_prop_synonym(name: str) -> str:
s = (name or "").strip().lower()
for a, b in (("ä", "ae"), ("ö", "oe"), ("ü", "ue"), ("ß", "ss")):
s = s.replace(a, b)
return "".join(c for c in s if c.isalnum())
# Alternative SMW-Anzeigelabel → Zielfeld (gleiche Targets wie SKILL_PROPERTY_MAP)
SKILL_PROPERTY_SYNONYM_TO_TARGET: dict[str, str] = {
"karaterelevanz": "karate_relevance",
"karatebezug": "karate_relevance",
"relevanzlevel": "relevance_level",
"wikirelevanz": "karate_relevance",
}
def _skill_property_target(prop_name: str) -> str | None:
"""Ermittelt Zielfeld für eine Skill-SMW-Property."""
if prop_name in SKILL_PROPERTY_MAP:
return SKILL_PROPERTY_MAP[prop_name]
n = _norm_prop_synonym(prop_name)
return SKILL_PROPERTY_SYNONYM_TO_TARGET.get(n)
# alternative Labels → Zielfeld-Name (gleiche Strings wie Werte in EXERCISE_PROPERTY_MAP)
EXERCISE_PROPERTY_SYNONYM_TO_TARGET: dict[str, str] = {
"primarycapability": "skill_names",
"hauptfaehigkeit": "skill_names",
"primaerefaehigkeit": "skill_names",
"hauptfhigkeit": "skill_names",
"hauptfahigkeit": "skill_names",
"capabilitylevel": "skill_levels_raw",
"faehigkeitsstufe": "skill_levels_raw",
"faehigkeitslevel": "skill_levels_raw",
"capabilitystufe": "skill_levels_raw",
"stilrichtung": "style_names",
"trainingsstilrichtung": "style_names",
}
def _exercise_property_target(prop_name: str) -> str | None:
"""Ermittelt Zielfeld für eine SMW-Property; None = unbekannt."""
if prop_name in EXERCISE_PROPERTY_MAP:
return EXERCISE_PROPERTY_MAP[prop_name]
n = _norm_prop_synonym(prop_name)
if n in EXERCISE_PROPERTY_SYNONYM_TO_TARGET:
return EXERCISE_PROPERTY_SYNONYM_TO_TARGET[n]
nlow = (prop_name or "").lower()
if "primary" in nlow and "capab" in nlow and "level" not in nlow:
return "skill_names"
if "capab" in nlow and "level" in nlow:
return "skill_levels_raw"
return None
# ------------------------------------------------------------------ #
# Haupt-Mapping-Funktion #
# ------------------------------------------------------------------ #
def map_wiki_to_exercise(
page_title: str,
wiki_page_id: Optional[int],
smw_props: dict,
) -> dict:
"""
Wandelt SMW-Properties einer Wiki-Seite in ein Exercise-Dict um.
Args:
page_title: Titel der Wiki-Seite (Fallback für title)
wiki_page_id: Interne MediaWiki-Seiten-ID
smw_props: {property_name: [value, ...]} aus SmwClient.browse_subject()
Returns:
Dict mit gemappten Feldern + Katalog-Listen für ID-Lookup.
"""
mapped: dict = {
"title": page_title,
"wiki_page_id": wiki_page_id,
# Tracking
"import_source": "mediawiki",
"import_id": page_title,
# Defaults
"visibility": "private",
"status": "draft",
# Katalog-Referenzen (Name → ID-Lookup erfolgt im Router)
"focus_area_names": [],
"target_group_names": [],
"age_group_names": [],
"skill_names": [],
"skill_levels_raw": [], # Integer-Strings ["3", "2"]
"style_names": [],
"method_names": [],
# Equipment
"equipment": [],
# Warnungen für unbekannte Katalog-Werte
"warnings": [],
}
for prop_name, values in smw_props.items():
if not values:
continue
target = _exercise_property_target(prop_name)
if not target:
continue
# Ersten Wert oder ganzes Array
first_value = values[0] if isinstance(values, list) else values
if target == "title_override":
mapped["title"] = wiki_name_to_label(first_value)
elif target in ("goal", "execution", "summary", "trainer_notes"):
mapped[target] = wikitext_to_plaintext(first_value)
elif target == "duration_raw":
dur_min, dur_max = parse_duration(first_value)
mapped["duration_min"] = dur_min
mapped["duration_max"] = dur_max
elif target == "group_size_raw":
gs_min, gs_max = parse_group_size(first_value)
mapped["group_size_min"] = gs_min
mapped["group_size_max"] = gs_max
elif target == "equipment_raw":
mapped["equipment"] = parse_equipment(values if isinstance(values, list) else [values])
elif target == "keywords_raw":
# Keywords für spätere Tag-Implementierung speichern
mapped["keywords"] = [wiki_name_to_label(v) for v in (values if isinstance(values, list) else [values])]
elif target == "focus_area_names":
mapped["focus_area_names"] = [wiki_name_to_label(v) for v in (values if isinstance(values, list) else [values])]
elif target == "target_group_names":
mapped["target_group_names"] = [wiki_name_to_label(v) for v in (values if isinstance(values, list) else [values])]
elif target == "age_group_names":
mapped["age_group_names"] = [wiki_name_to_label(v) for v in (values if isinstance(values, list) else [values])]
elif target == "method_names":
mapped["method_names"] = [wiki_name_to_label(v) for v in (values if isinstance(values, list) else [values])]
elif target == "style_names":
mapped["style_names"] = [wiki_name_to_label(v) for v in (values if isinstance(values, list) else [values])]
elif target == "skill_names":
mapped["skill_names"] = [wiki_name_to_label(v) for v in (values if isinstance(values, list) else [values])]
elif target == "skill_levels_raw":
mapped["skill_levels_raw"] = list(values) if isinstance(values, list) else [values]
return mapped
def build_skill_assignments(mapped: dict) -> list[dict]:
"""
Erstellt Skill-Zuordnungen aus PrimaryCapability + CapabilityLevel.
CapabilityLevel [3, 2] korrespondiert mit PrimaryCapability [Schnellkraft, Schnelligkeitsausdauer]
→ target_level als kanonischer Slug (basis … optimierung), DB VARCHAR.
"""
skills = mapped.get("skill_names", [])
levels = mapped.get("skill_levels_raw", [])
assignments = []
for idx, skill_name in enumerate(skills):
level_str = levels[idx] if idx < len(levels) else "1"
try:
raw = str(level_str).strip()
except (TypeError, AttributeError):
raw = "1"
target_slug = map_capability_level(raw) if raw else "basis"
assignments.append({
"skill_name": skill_name,
"target_level": target_slug,
"required_level": None,
"intensity": "mittel",
"is_primary": False,
})
return assignments
def map_wiki_to_skill(
page_title: str,
wiki_page_id: Optional[int],
smw_props: dict,
) -> dict:
"""Wandelt SMW-Properties einer Fähigkeitsbeschreibung-Seite in ein Skill-Dict um."""
mapped = {
"name": page_title,
"wiki_page_id": wiki_page_id,
"import_source": "mediawiki",
"import_id": page_title,
"warnings": [],
}
description_text: Optional[str] = None
for prop_name, values in smw_props.items():
if not values:
continue
target = _skill_property_target(prop_name)
if not target:
continue
first_value = values[0] if isinstance(values, list) else values
if target == "description":
description_text = wikitext_to_plaintext(str(first_value))
elif target == "karate_relevance":
mapped["karate_relevance"] = wikitext_to_plaintext(str(first_value))
elif target == "relevance_level":
parsed = parse_wiki_relevance_level(first_value if isinstance(first_value, str) else str(first_value))
if parsed is None:
mapped["warnings"].append(f"Unbekanntes RelevanzLevel: {first_value!r}")
else:
mapped["relevance_level"] = parsed
if description_text:
mapped["description"] = description_text
return mapped
def map_wiki_to_method(
page_title: str,
wiki_page_id: Optional[int],
smw_props: dict,
) -> dict:
"""Wandelt SMW-Properties einer Methodenbeschreibung-Seite in ein Method-Dict um."""
mapped = {
"name": page_title,
"wiki_page_id": wiki_page_id,
"import_source": "mediawiki",
"import_id": page_title,
"warnings": [],
}
description_parts = []
for prop_name, values in smw_props.items():
if not values:
continue
target = METHOD_PROPERTY_MAP.get(prop_name)
if not target:
continue
first_value = values[0] if isinstance(values, list) else values
if target == "description":
description_parts.insert(0, wikitext_to_plaintext(first_value))
elif target == "code":
mapped["code"] = first_value.strip()
elif target == "karate_relevance":
rel = wikitext_to_plaintext(first_value)
description_parts.append(f"\nKarate-Relevanz: {rel}")
if description_parts:
mapped["description"] = "\n".join(description_parts).strip()
return mapped