shinkan-jinkendo/backend/smw_mapper.py
Lars 025b161d2f
Some checks failed
Deploy Development / deploy (push) Successful in 36s
Test Suite / lint-backend (push) Successful in 0s
Test Suite / build-frontend (push) Successful in 6s
Test Suite / playwright-tests (push) Failing after 1m56s
feat: enhance exercise mapping and filtering capabilities
- Added support for style direction mappings in the backend, allowing for improved categorization of exercises.
- Introduced a new function to normalize property synonyms, enhancing the mapping of exercise properties.
- Updated the exercise catalog assignment logic to include style directions, ensuring proper database entries.
- Enhanced the ExercisesListPage with new filtering options for style directions, improving user experience and search capabilities.
2026-04-28 07:25:33 +02:00

420 lines
15 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
Semantic MediaWiki → Shinkan Field Mapper
Wandelt SMW-Properties von karatetrainer.net in lokale DB-Felder um.
Property-Namen wurden via discover_properties() auf echten Wiki-Seiten ermittelt.
Entdeckte Kategorien:
Übungen: Kategorie: Übungen (auch "Übungen Karate", "Übungen allgemein")
Fähigkeiten: Fähigkeitsbeschreibung
Methoden: Methodenbeschreibung
"""
import re
import logging
from typing import Optional
logger = logging.getLogger(__name__)
# ------------------------------------------------------------------ #
# CapabilityLevel Integer → benannte Stufen #
# ------------------------------------------------------------------ #
# Mapping: SMW-Integer → Shinkan-Stufenname
CAPABILITY_LEVEL_MAP = {
"1": "basis",
"2": "grundlagen",
"3": "aufbau",
"4": "fortgeschritten",
"5": "optimierung",
}
# ------------------------------------------------------------------ #
# SMW Property → lokales Feld #
# Echte Namen von karatetrainer.net (via discover_properties) #
# ------------------------------------------------------------------ #
# Übungen (exercises)
EXERCISE_PROPERTY_MAP = {
# Kern-Felder
"Übungsbezeichnung": "title_override", # Übungsname (bevorzugt ggü. Seitentitel)
"Ziel": "goal",
"Durchführung": "execution",
"Summary": "summary",
"Hinweise": "trainer_notes",
"Plandauer": "duration_raw", # Zahl in Minuten z.B. "10"
"Gruppengröße": "group_size_raw", # Zahl z.B. "2"
"Hilfsmittel": "equipment_raw", # Komma-Liste / einzelner Wert
"Schlüsselworte": "keywords_raw", # Keywords (nicht direkt in DB, für spätere Tags)
# Katalog-Felder (Name → ID Lookup)
"Übungstyp": "focus_area_names", # "Karate" → focus_area
"Zielgruppe": "target_group_names",
"Altersgruppe": "age_group_names",
"Trainingsmethode": "method_names", # Wiki-Seitenname z.B. "Plyometrisches_Training"
"Stilrichtung": "style_names", # z. B. Shotokan; siehe EXERCISE_PROPERTY_SYNONYM_TO_TARGET
# Fähigkeiten (als Namen + Level)
"PrimaryCapability": "skill_names", # Skill-Namen (können mehrere sein)
"CapabilityLevel": "skill_levels_raw", # Integer-Levels ["3", "2"] → aufbau, grundlagen
# Weitere Felder (optional)
"Graduierung": "graduierung", # "0 - Anfänger" (zukünftige Nutzung)
"Lernstufe": "lernstufe", # "Lernstufe_1_-_Erlernen_und_Festigen"
}
# Fähigkeiten (skills) Kategorie: Fähigkeitsbeschreibung
SKILL_PROPERTY_MAP = {
"Summary": "description",
"KarateRelevanz": "karate_relevance", # Wird in description ergänzt
"RelevanzLevel": "relevance_level", # 1-3, nicht direkt in skills DB
}
# Trainingsmethoden Kategorie: Methodenbeschreibung
METHOD_PROPERTY_MAP = {
"Summary": "description",
"Kurzbezeichnung": "code", # Abkürzung z.B. "DM"
"KarateRelevanz": "karate_relevance",
"PrimaryCapability": "skill_names", # Verknüpfte Fähigkeiten
}
# ------------------------------------------------------------------ #
# Wikitext → Plaintext #
# ------------------------------------------------------------------ #
def wikitext_to_plaintext(wikitext: str) -> str:
"""Entfernt Wikitext-Formatierungen und gibt lesbaren Plaintext zurück."""
text = wikitext
# Externe Links: [https://example.com Text] → Text
text = re.sub(r'\[https?://\S+\s+([^\]]+)\]', r'\1', text)
# Interne Links mit Alias: [[Link|Text]] → Text
text = re.sub(r'\[\[([^|\]]+)\|([^\]]+)\]\]', r'\2', text)
# Interne Links ohne Alias: [[Link]] → Link (Unterstriche → Leerzeichen)
text = re.sub(r'\[\[([^\]]+)\]\]', lambda m: m.group(1).replace('_', ' '), text)
# Templates entfernen (einzeilig)
text = re.sub(r'\{\{[^}]+\}\}', '', text)
# Fettdruck und Kursiv
text = re.sub(r"'''(.+?)'''", r'\1', text)
text = re.sub(r"''(.+?)''", r'\1', text)
# HTML-Tags entfernen (inkl. <br>)
text = re.sub(r'<br\s*/?>', '\n', text, flags=re.IGNORECASE)
text = re.sub(r'<[^>]+>', '', text)
# Überschriften
text = re.sub(r'={2,6}\s*(.+?)\s*={2,6}', r'\n\1\n', text)
# Aufzählungszeichen normalisieren
text = re.sub(r'^[*#:;]+\s*', '- ', text, flags=re.MULTILINE)
# Mehrfache Leerzeilen normalisieren
text = re.sub(r'\n{3,}', '\n\n', text)
return text.strip()
def wiki_name_to_label(wiki_name: str) -> str:
"""Wandelt Wiki-Seitennamen in lesbare Labels um: Plyometrisches_Training → Plyometrisches Training"""
return wiki_name.replace('_', ' ').strip()
# ------------------------------------------------------------------ #
# Parsing-Hilfsfunktionen #
# ------------------------------------------------------------------ #
def parse_duration(raw: str) -> tuple[Optional[int], Optional[int]]:
"""
"10" → (10, 10) (Plandauer ist immer eine einzelne Zahl in Minuten)
"10-15" → (10, 15)
"""
if not raw:
return None, None
numbers = re.findall(r'\d+', raw)
if not numbers:
return None, None
if len(numbers) == 1:
val = int(numbers[0])
return val, val
return int(numbers[0]), int(numbers[1])
def parse_group_size(raw: str) -> tuple[Optional[int], Optional[int]]:
"""
"2" → (2, 2) (Gruppengröße ist immer eine einzelne Zahl)
"""
if not raw:
return None, None
numbers = re.findall(r'\d+', raw)
if not numbers:
return None, None
val = int(numbers[0])
if len(numbers) == 1:
return val, None # Minimum, kein Maximum angegeben
return int(numbers[0]), int(numbers[1])
def parse_equipment(raw: list[str]) -> list[str]:
"""Normalisiert Equipment-Liste: ["Ausdruck"] oder ["Gewicht"] → bereinigt"""
result = []
for item in raw:
for part in re.split(r'[,;/]', item):
cleaned = wiki_name_to_label(part.strip())
if cleaned:
result.append(cleaned)
return result
def map_capability_level(level_str: str) -> str:
"""Wandelt Integer-Level in kanonischen Stufen-Slug: "3""aufbau" """
return CAPABILITY_LEVEL_MAP.get(level_str.strip(), "basis")
# ------------------------------------------------------------------ #
# SMW-Property-Label → Mapper-Zielfeld (Werte wie in EXERCISE_PROPERTY_MAP) #
# browse_subject liefert Anzeigenamen, nicht zwingend interne Property-IDs. #
# ------------------------------------------------------------------ #
def _norm_prop_synonym(name: str) -> str:
s = (name or "").strip().lower()
for a, b in (("ä", "ae"), ("ö", "oe"), ("ü", "ue"), ("ß", "ss")):
s = s.replace(a, b)
return "".join(c for c in s if c.isalnum())
# alternative Labels → Zielfeld-Name (gleiche Strings wie Werte in EXERCISE_PROPERTY_MAP)
EXERCISE_PROPERTY_SYNONYM_TO_TARGET: dict[str, str] = {
"primarycapability": "skill_names",
"hauptfaehigkeit": "skill_names",
"primaerefaehigkeit": "skill_names",
"hauptfhigkeit": "skill_names",
"hauptfahigkeit": "skill_names",
"capabilitylevel": "skill_levels_raw",
"faehigkeitsstufe": "skill_levels_raw",
"faehigkeitslevel": "skill_levels_raw",
"capabilitystufe": "skill_levels_raw",
"stilrichtung": "style_names",
"trainingsstilrichtung": "style_names",
}
def _exercise_property_target(prop_name: str) -> str | None:
"""Ermittelt Zielfeld für eine SMW-Property; None = unbekannt."""
if prop_name in EXERCISE_PROPERTY_MAP:
return EXERCISE_PROPERTY_MAP[prop_name]
n = _norm_prop_synonym(prop_name)
if n in EXERCISE_PROPERTY_SYNONYM_TO_TARGET:
return EXERCISE_PROPERTY_SYNONYM_TO_TARGET[n]
nlow = (prop_name or "").lower()
if "primary" in nlow and "capab" in nlow and "level" not in nlow:
return "skill_names"
if "capab" in nlow and "level" in nlow:
return "skill_levels_raw"
return None
# ------------------------------------------------------------------ #
# Haupt-Mapping-Funktion #
# ------------------------------------------------------------------ #
def map_wiki_to_exercise(
page_title: str,
wiki_page_id: Optional[int],
smw_props: dict,
) -> dict:
"""
Wandelt SMW-Properties einer Wiki-Seite in ein Exercise-Dict um.
Args:
page_title: Titel der Wiki-Seite (Fallback für title)
wiki_page_id: Interne MediaWiki-Seiten-ID
smw_props: {property_name: [value, ...]} aus SmwClient.browse_subject()
Returns:
Dict mit gemappten Feldern + Katalog-Listen für ID-Lookup.
"""
mapped: dict = {
"title": page_title,
"wiki_page_id": wiki_page_id,
# Tracking
"import_source": "mediawiki",
"import_id": page_title,
# Defaults
"visibility": "private",
"status": "draft",
# Katalog-Referenzen (Name → ID-Lookup erfolgt im Router)
"focus_area_names": [],
"target_group_names": [],
"age_group_names": [],
"skill_names": [],
"skill_levels_raw": [], # Integer-Strings ["3", "2"]
"style_names": [],
"method_names": [],
# Equipment
"equipment": [],
# Warnungen für unbekannte Katalog-Werte
"warnings": [],
}
for prop_name, values in smw_props.items():
if not values:
continue
target = _exercise_property_target(prop_name)
if not target:
continue
# Ersten Wert oder ganzes Array
first_value = values[0] if isinstance(values, list) else values
if target == "title_override":
mapped["title"] = wiki_name_to_label(first_value)
elif target in ("goal", "execution", "summary", "trainer_notes"):
mapped[target] = wikitext_to_plaintext(first_value)
elif target == "duration_raw":
dur_min, dur_max = parse_duration(first_value)
mapped["duration_min"] = dur_min
mapped["duration_max"] = dur_max
elif target == "group_size_raw":
gs_min, gs_max = parse_group_size(first_value)
mapped["group_size_min"] = gs_min
mapped["group_size_max"] = gs_max
elif target == "equipment_raw":
mapped["equipment"] = parse_equipment(values if isinstance(values, list) else [values])
elif target == "keywords_raw":
# Keywords für spätere Tag-Implementierung speichern
mapped["keywords"] = [wiki_name_to_label(v) for v in (values if isinstance(values, list) else [values])]
elif target == "focus_area_names":
mapped["focus_area_names"] = [wiki_name_to_label(v) for v in (values if isinstance(values, list) else [values])]
elif target == "target_group_names":
mapped["target_group_names"] = [wiki_name_to_label(v) for v in (values if isinstance(values, list) else [values])]
elif target == "age_group_names":
mapped["age_group_names"] = [wiki_name_to_label(v) for v in (values if isinstance(values, list) else [values])]
elif target == "method_names":
mapped["method_names"] = [wiki_name_to_label(v) for v in (values if isinstance(values, list) else [values])]
elif target == "style_names":
mapped["style_names"] = [wiki_name_to_label(v) for v in (values if isinstance(values, list) else [values])]
elif target == "skill_names":
mapped["skill_names"] = [wiki_name_to_label(v) for v in (values if isinstance(values, list) else [values])]
elif target == "skill_levels_raw":
mapped["skill_levels_raw"] = list(values) if isinstance(values, list) else [values]
return mapped
def build_skill_assignments(mapped: dict) -> list[dict]:
"""
Erstellt Skill-Zuordnungen aus PrimaryCapability + CapabilityLevel.
CapabilityLevel [3, 2] korrespondiert mit PrimaryCapability [Schnellkraft, Schnelligkeitsausdauer]
→ target_level als kanonischer Slug (basis … optimierung), DB VARCHAR.
"""
skills = mapped.get("skill_names", [])
levels = mapped.get("skill_levels_raw", [])
assignments = []
for idx, skill_name in enumerate(skills):
level_str = levels[idx] if idx < len(levels) else "1"
try:
raw = str(level_str).strip()
except (TypeError, AttributeError):
raw = "1"
target_slug = map_capability_level(raw) if raw else "basis"
assignments.append({
"skill_name": skill_name,
"target_level": target_slug,
"required_level": None,
"intensity": None,
"is_primary": idx == 0,
})
return assignments
def map_wiki_to_skill(
page_title: str,
wiki_page_id: Optional[int],
smw_props: dict,
) -> dict:
"""Wandelt SMW-Properties einer Fähigkeitsbeschreibung-Seite in ein Skill-Dict um."""
mapped = {
"name": page_title,
"wiki_page_id": wiki_page_id,
"import_source": "mediawiki",
"import_id": page_title,
"warnings": [],
}
description_parts = []
for prop_name, values in smw_props.items():
if not values:
continue
target = SKILL_PROPERTY_MAP.get(prop_name)
if not target:
continue
first_value = values[0] if isinstance(values, list) else values
if target == "description":
description_parts.insert(0, wikitext_to_plaintext(first_value))
elif target == "karate_relevance":
rel = wikitext_to_plaintext(first_value)
description_parts.append(f"\nKarate-Relevanz: {rel}")
if description_parts:
mapped["description"] = "\n".join(description_parts).strip()
return mapped
def map_wiki_to_method(
page_title: str,
wiki_page_id: Optional[int],
smw_props: dict,
) -> dict:
"""Wandelt SMW-Properties einer Methodenbeschreibung-Seite in ein Method-Dict um."""
mapped = {
"name": page_title,
"wiki_page_id": wiki_page_id,
"import_source": "mediawiki",
"import_id": page_title,
"warnings": [],
}
description_parts = []
for prop_name, values in smw_props.items():
if not values:
continue
target = METHOD_PROPERTY_MAP.get(prop_name)
if not target:
continue
first_value = values[0] if isinstance(values, list) else values
if target == "description":
description_parts.insert(0, wikitext_to_plaintext(first_value))
elif target == "code":
mapped["code"] = first_value.strip()
elif target == "karate_relevance":
rel = wikitext_to_plaintext(first_value)
description_parts.append(f"\nKarate-Relevanz: {rel}")
if description_parts:
mapped["description"] = "\n".join(description_parts).strip()
return mapped