shinkan-jinkendo/backend/smw_mapper.py
Lars 76098f5244
Some checks failed
Deploy Development / deploy (push) Successful in 36s
Test Suite / lint-backend (push) Successful in 1s
Test Suite / build-frontend (push) Successful in 5s
Test Suite / playwright-tests (push) Failing after 1m55s
feat: update capability levels and enhance exercise filtering
- Updated capability level mappings in the backend to reflect new terminology (e.g., "einsteiger" to "basis" and "experte" to "optimierung").
- Refactored the exercise management logic to normalize skill levels using canonical slugs, improving consistency across the application.
- Enhanced the ExercisesListPage with additional filtering options for style direction, training type, and target group, along with AI search capabilities.
- Incremented application version to 0.7.7 and updated changelog to document these changes.
2026-04-27 18:25:23 +02:00

372 lines
13 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
Semantic MediaWiki → Shinkan Field Mapper
Wandelt SMW-Properties von karatetrainer.net in lokale DB-Felder um.
Property-Namen wurden via discover_properties() auf echten Wiki-Seiten ermittelt.
Entdeckte Kategorien:
Übungen: Kategorie: Übungen (auch "Übungen Karate", "Übungen allgemein")
Fähigkeiten: Fähigkeitsbeschreibung
Methoden: Methodenbeschreibung
"""
import re
import logging
from typing import Optional
logger = logging.getLogger(__name__)
# ------------------------------------------------------------------ #
# CapabilityLevel Integer → benannte Stufen #
# ------------------------------------------------------------------ #
# Mapping: SMW-Integer → Shinkan-Stufenname
CAPABILITY_LEVEL_MAP = {
"1": "basis",
"2": "grundlagen",
"3": "aufbau",
"4": "fortgeschritten",
"5": "optimierung",
}
# ------------------------------------------------------------------ #
# SMW Property → lokales Feld #
# Echte Namen von karatetrainer.net (via discover_properties) #
# ------------------------------------------------------------------ #
# Übungen (exercises)
EXERCISE_PROPERTY_MAP = {
# Kern-Felder
"Übungsbezeichnung": "title_override", # Übungsname (bevorzugt ggü. Seitentitel)
"Ziel": "goal",
"Durchführung": "execution",
"Summary": "summary",
"Hinweise": "trainer_notes",
"Plandauer": "duration_raw", # Zahl in Minuten z.B. "10"
"Gruppengröße": "group_size_raw", # Zahl z.B. "2"
"Hilfsmittel": "equipment_raw", # Komma-Liste / einzelner Wert
"Schlüsselworte": "keywords_raw", # Keywords (nicht direkt in DB, für spätere Tags)
# Katalog-Felder (Name → ID Lookup)
"Übungstyp": "focus_area_names", # "Karate" → focus_area
"Zielgruppe": "target_group_names",
"Altersgruppe": "age_group_names",
"Trainingsmethode": "method_names", # Wiki-Seitenname z.B. "Plyometrisches_Training"
# Fähigkeiten (als Namen + Level)
"PrimaryCapability": "skill_names", # Skill-Namen (können mehrere sein)
"CapabilityLevel": "skill_levels_raw", # Integer-Levels ["3", "2"] → aufbau, grundlagen
# Weitere Felder (optional)
"Graduierung": "graduierung", # "0 - Anfänger" (zukünftige Nutzung)
"Lernstufe": "lernstufe", # "Lernstufe_1_-_Erlernen_und_Festigen"
}
# Fähigkeiten (skills) Kategorie: Fähigkeitsbeschreibung
SKILL_PROPERTY_MAP = {
"Summary": "description",
"KarateRelevanz": "karate_relevance", # Wird in description ergänzt
"RelevanzLevel": "relevance_level", # 1-3, nicht direkt in skills DB
}
# Trainingsmethoden Kategorie: Methodenbeschreibung
METHOD_PROPERTY_MAP = {
"Summary": "description",
"Kurzbezeichnung": "code", # Abkürzung z.B. "DM"
"KarateRelevanz": "karate_relevance",
"PrimaryCapability": "skill_names", # Verknüpfte Fähigkeiten
}
# ------------------------------------------------------------------ #
# Wikitext → Plaintext #
# ------------------------------------------------------------------ #
def wikitext_to_plaintext(wikitext: str) -> str:
"""Entfernt Wikitext-Formatierungen und gibt lesbaren Plaintext zurück."""
text = wikitext
# Externe Links: [https://example.com Text] → Text
text = re.sub(r'\[https?://\S+\s+([^\]]+)\]', r'\1', text)
# Interne Links mit Alias: [[Link|Text]] → Text
text = re.sub(r'\[\[([^|\]]+)\|([^\]]+)\]\]', r'\2', text)
# Interne Links ohne Alias: [[Link]] → Link (Unterstriche → Leerzeichen)
text = re.sub(r'\[\[([^\]]+)\]\]', lambda m: m.group(1).replace('_', ' '), text)
# Templates entfernen (einzeilig)
text = re.sub(r'\{\{[^}]+\}\}', '', text)
# Fettdruck und Kursiv
text = re.sub(r"'''(.+?)'''", r'\1', text)
text = re.sub(r"''(.+?)''", r'\1', text)
# HTML-Tags entfernen (inkl. <br>)
text = re.sub(r'<br\s*/?>', '\n', text, flags=re.IGNORECASE)
text = re.sub(r'<[^>]+>', '', text)
# Überschriften
text = re.sub(r'={2,6}\s*(.+?)\s*={2,6}', r'\n\1\n', text)
# Aufzählungszeichen normalisieren
text = re.sub(r'^[*#:;]+\s*', '- ', text, flags=re.MULTILINE)
# Mehrfache Leerzeilen normalisieren
text = re.sub(r'\n{3,}', '\n\n', text)
return text.strip()
def wiki_name_to_label(wiki_name: str) -> str:
"""Wandelt Wiki-Seitennamen in lesbare Labels um: Plyometrisches_Training → Plyometrisches Training"""
return wiki_name.replace('_', ' ').strip()
# ------------------------------------------------------------------ #
# Parsing-Hilfsfunktionen #
# ------------------------------------------------------------------ #
def parse_duration(raw: str) -> tuple[Optional[int], Optional[int]]:
"""
"10" → (10, 10) (Plandauer ist immer eine einzelne Zahl in Minuten)
"10-15" → (10, 15)
"""
if not raw:
return None, None
numbers = re.findall(r'\d+', raw)
if not numbers:
return None, None
if len(numbers) == 1:
val = int(numbers[0])
return val, val
return int(numbers[0]), int(numbers[1])
def parse_group_size(raw: str) -> tuple[Optional[int], Optional[int]]:
"""
"2" → (2, 2) (Gruppengröße ist immer eine einzelne Zahl)
"""
if not raw:
return None, None
numbers = re.findall(r'\d+', raw)
if not numbers:
return None, None
val = int(numbers[0])
if len(numbers) == 1:
return val, None # Minimum, kein Maximum angegeben
return int(numbers[0]), int(numbers[1])
def parse_equipment(raw: list[str]) -> list[str]:
"""Normalisiert Equipment-Liste: ["Ausdruck"] oder ["Gewicht"] → bereinigt"""
result = []
for item in raw:
for part in re.split(r'[,;/]', item):
cleaned = wiki_name_to_label(part.strip())
if cleaned:
result.append(cleaned)
return result
def map_capability_level(level_str: str) -> str:
"""Wandelt Integer-Level in kanonischen Stufen-Slug: "3""aufbau" """
return CAPABILITY_LEVEL_MAP.get(level_str.strip(), "basis")
# ------------------------------------------------------------------ #
# Haupt-Mapping-Funktion #
# ------------------------------------------------------------------ #
def map_wiki_to_exercise(
page_title: str,
wiki_page_id: Optional[int],
smw_props: dict,
) -> dict:
"""
Wandelt SMW-Properties einer Wiki-Seite in ein Exercise-Dict um.
Args:
page_title: Titel der Wiki-Seite (Fallback für title)
wiki_page_id: Interne MediaWiki-Seiten-ID
smw_props: {property_name: [value, ...]} aus SmwClient.browse_subject()
Returns:
Dict mit gemappten Feldern + Katalog-Listen für ID-Lookup.
"""
mapped: dict = {
"title": page_title,
"wiki_page_id": wiki_page_id,
# Tracking
"import_source": "mediawiki",
"import_id": page_title,
# Defaults
"visibility": "private",
"status": "draft",
# Katalog-Referenzen (Name → ID-Lookup erfolgt im Router)
"focus_area_names": [],
"target_group_names": [],
"age_group_names": [],
"skill_names": [],
"skill_levels_raw": [], # Integer-Strings ["3", "2"]
"method_names": [],
# Equipment
"equipment": [],
# Warnungen für unbekannte Katalog-Werte
"warnings": [],
}
for prop_name, values in smw_props.items():
if not values:
continue
target = EXERCISE_PROPERTY_MAP.get(prop_name)
if not target:
continue
# Ersten Wert oder ganzes Array
first_value = values[0] if isinstance(values, list) else values
if target == "title_override":
mapped["title"] = wiki_name_to_label(first_value)
elif target in ("goal", "execution", "summary", "trainer_notes"):
mapped[target] = wikitext_to_plaintext(first_value)
elif target == "duration_raw":
dur_min, dur_max = parse_duration(first_value)
mapped["duration_min"] = dur_min
mapped["duration_max"] = dur_max
elif target == "group_size_raw":
gs_min, gs_max = parse_group_size(first_value)
mapped["group_size_min"] = gs_min
mapped["group_size_max"] = gs_max
elif target == "equipment_raw":
mapped["equipment"] = parse_equipment(values if isinstance(values, list) else [values])
elif target == "keywords_raw":
# Keywords für spätere Tag-Implementierung speichern
mapped["keywords"] = [wiki_name_to_label(v) for v in (values if isinstance(values, list) else [values])]
elif target == "focus_area_names":
mapped["focus_area_names"] = [wiki_name_to_label(v) for v in (values if isinstance(values, list) else [values])]
elif target == "target_group_names":
mapped["target_group_names"] = [wiki_name_to_label(v) for v in (values if isinstance(values, list) else [values])]
elif target == "age_group_names":
mapped["age_group_names"] = [wiki_name_to_label(v) for v in (values if isinstance(values, list) else [values])]
elif target == "method_names":
mapped["method_names"] = [wiki_name_to_label(v) for v in (values if isinstance(values, list) else [values])]
elif target == "skill_names":
mapped["skill_names"] = [wiki_name_to_label(v) for v in (values if isinstance(values, list) else [values])]
elif target == "skill_levels_raw":
mapped["skill_levels_raw"] = list(values) if isinstance(values, list) else [values]
return mapped
def build_skill_assignments(mapped: dict) -> list[dict]:
"""
Erstellt Skill-Zuordnungen aus PrimaryCapability + CapabilityLevel.
CapabilityLevel [3, 2] korrespondiert mit PrimaryCapability [Schnellkraft, Schnelligkeitsausdauer]
→ target_level als kanonischer Slug (basis … optimierung), DB VARCHAR.
"""
skills = mapped.get("skill_names", [])
levels = mapped.get("skill_levels_raw", [])
assignments = []
for idx, skill_name in enumerate(skills):
level_str = levels[idx] if idx < len(levels) else "1"
try:
raw = str(level_str).strip()
except (TypeError, AttributeError):
raw = "1"
target_slug = map_capability_level(raw) if raw else "basis"
assignments.append({
"skill_name": skill_name,
"target_level": target_slug,
"required_level": None,
"intensity": None,
"is_primary": idx == 0,
})
return assignments
def map_wiki_to_skill(
page_title: str,
wiki_page_id: Optional[int],
smw_props: dict,
) -> dict:
"""Wandelt SMW-Properties einer Fähigkeitsbeschreibung-Seite in ein Skill-Dict um."""
mapped = {
"name": page_title,
"wiki_page_id": wiki_page_id,
"import_source": "mediawiki",
"import_id": page_title,
"warnings": [],
}
description_parts = []
for prop_name, values in smw_props.items():
if not values:
continue
target = SKILL_PROPERTY_MAP.get(prop_name)
if not target:
continue
first_value = values[0] if isinstance(values, list) else values
if target == "description":
description_parts.insert(0, wikitext_to_plaintext(first_value))
elif target == "karate_relevance":
rel = wikitext_to_plaintext(first_value)
description_parts.append(f"\nKarate-Relevanz: {rel}")
if description_parts:
mapped["description"] = "\n".join(description_parts).strip()
return mapped
def map_wiki_to_method(
page_title: str,
wiki_page_id: Optional[int],
smw_props: dict,
) -> dict:
"""Wandelt SMW-Properties einer Methodenbeschreibung-Seite in ein Method-Dict um."""
mapped = {
"name": page_title,
"wiki_page_id": wiki_page_id,
"import_source": "mediawiki",
"import_id": page_title,
"warnings": [],
}
description_parts = []
for prop_name, values in smw_props.items():
if not values:
continue
target = METHOD_PROPERTY_MAP.get(prop_name)
if not target:
continue
first_value = values[0] if isinstance(values, list) else values
if target == "description":
description_parts.insert(0, wikitext_to_plaintext(first_value))
elif target == "code":
mapped["code"] = first_value.strip()
elif target == "karate_relevance":
rel = wikitext_to_plaintext(first_value)
description_parts.append(f"\nKarate-Relevanz: {rel}")
if description_parts:
mapped["description"] = "\n".join(description_parts).strip()
return mapped