""" Semantic MediaWiki → Shinkan Field Mapper Wandelt SMW-Properties von karatetrainer.net in lokale DB-Felder um. Property-Namen wurden via discover_properties() auf echten Wiki-Seiten ermittelt. Entdeckte Kategorien: Übungen: Kategorie: Übungen (auch "Übungen Karate", "Übungen allgemein") Fähigkeiten: Fähigkeitsbeschreibung Methoden: Methodenbeschreibung """ import re import logging from typing import Optional logger = logging.getLogger(__name__) # ------------------------------------------------------------------ # # CapabilityLevel Integer → benannte Stufen # # ------------------------------------------------------------------ # # Mapping: SMW-Integer → Shinkan-Stufenname CAPABILITY_LEVEL_MAP = { "1": "basis", "2": "grundlagen", "3": "aufbau", "4": "fortgeschritten", "5": "optimierung", } # ------------------------------------------------------------------ # # SMW Property → lokales Feld # # Echte Namen von karatetrainer.net (via discover_properties) # # ------------------------------------------------------------------ # # Übungen (exercises) EXERCISE_PROPERTY_MAP = { # Kern-Felder "Übungsbezeichnung": "title_override", # Übungsname (bevorzugt ggü. Seitentitel) "Ziel": "goal", "Durchführung": "execution", "Summary": "summary", "Hinweise": "trainer_notes", "Plandauer": "duration_raw", # Zahl in Minuten z.B. "10" "Gruppengröße": "group_size_raw", # Zahl z.B. "2" "Hilfsmittel": "equipment_raw", # Komma-Liste / einzelner Wert "Schlüsselworte": "keywords_raw", # Keywords (nicht direkt in DB, für spätere Tags) # Katalog-Felder (Name → ID Lookup) "Übungstyp": "focus_area_names", # "Karate" → focus_area "Zielgruppe": "target_group_names", "Altersgruppe": "age_group_names", "Trainingsmethode": "method_names", # Wiki-Seitenname z.B. "Plyometrisches_Training" "Stilrichtung": "style_names", # z. B. Shotokan; siehe EXERCISE_PROPERTY_SYNONYM_TO_TARGET # Fähigkeiten (als Namen + Level) "PrimaryCapability": "skill_names", # Skill-Namen (können mehrere sein) "CapabilityLevel": "skill_levels_raw", # Integer-Levels ["3", "2"] → aufbau, grundlagen # Weitere Felder (optional) "Graduierung": "graduierung", # "0 - Anfänger" (zukünftige Nutzung) "Lernstufe": "lernstufe", # "Lernstufe_1_-_Erlernen_und_Festigen" } # Fähigkeiten (skills) – Kategorie: Fähigkeitsbeschreibung SKILL_PROPERTY_MAP = { "Summary": "description", "KarateRelevanz": "karate_relevance", # Spalte skills.karate_relevance (+ Wiki-Import) "RelevanzLevel": "relevance_level", # Spalte skills.relevance_level 1–3 } # Trainingsmethoden – Kategorie: Methodenbeschreibung METHOD_PROPERTY_MAP = { "Summary": "description", "Kurzbezeichnung": "code", # Abkürzung z.B. "DM" "KarateRelevanz": "karate_relevance", "PrimaryCapability": "skill_names", # Verknüpfte Fähigkeiten } # ------------------------------------------------------------------ # # Wikitext → Plaintext # # ------------------------------------------------------------------ # def wikitext_to_plaintext(wikitext: str) -> str: """Entfernt Wikitext-Formatierungen und gibt lesbaren Plaintext zurück.""" text = wikitext # Externe Links: [https://example.com Text] → Text text = re.sub(r'\[https?://\S+\s+([^\]]+)\]', r'\1', text) # Interne Links mit Alias: [[Link|Text]] → Text text = re.sub(r'\[\[([^|\]]+)\|([^\]]+)\]\]', r'\2', text) # Interne Links ohne Alias: [[Link]] → Link (Unterstriche → Leerzeichen) text = re.sub(r'\[\[([^\]]+)\]\]', lambda m: m.group(1).replace('_', ' '), text) # Templates entfernen (einzeilig) text = re.sub(r'\{\{[^}]+\}\}', '', text) # Fettdruck und Kursiv text = re.sub(r"'''(.+?)'''", r'\1', text) text = re.sub(r"''(.+?)''", r'\1', text) # HTML-Tags entfernen (inkl.
) text = re.sub(r'', '\n', text, flags=re.IGNORECASE) text = re.sub(r'<[^>]+>', '', text) # Überschriften text = re.sub(r'={2,6}\s*(.+?)\s*={2,6}', r'\n\1\n', text) # Aufzählungszeichen normalisieren text = re.sub(r'^[*#:;]+\s*', '- ', text, flags=re.MULTILINE) # Mehrfache Leerzeilen normalisieren text = re.sub(r'\n{3,}', '\n\n', text) return text.strip() def wiki_name_to_label(wiki_name: str) -> str: """Wandelt Wiki-Seitennamen in lesbare Labels um: Plyometrisches_Training → Plyometrisches Training""" return wiki_name.replace('_', ' ').strip() # ------------------------------------------------------------------ # # Parsing-Hilfsfunktionen # # ------------------------------------------------------------------ # def parse_duration(raw: str) -> tuple[Optional[int], Optional[int]]: """ "10" → (10, 10) (Plandauer ist immer eine einzelne Zahl in Minuten) "10-15" → (10, 15) """ if not raw: return None, None numbers = re.findall(r'\d+', raw) if not numbers: return None, None if len(numbers) == 1: val = int(numbers[0]) return val, val return int(numbers[0]), int(numbers[1]) def parse_group_size(raw: str) -> tuple[Optional[int], Optional[int]]: """ "2" → (2, 2) (Gruppengröße ist immer eine einzelne Zahl) """ if not raw: return None, None numbers = re.findall(r'\d+', raw) if not numbers: return None, None val = int(numbers[0]) if len(numbers) == 1: return val, None # Minimum, kein Maximum angegeben return int(numbers[0]), int(numbers[1]) def parse_equipment(raw: list[str]) -> list[str]: """Normalisiert Equipment-Liste: ["Ausdruck"] oder ["Gewicht"] → bereinigt""" result = [] for item in raw: for part in re.split(r'[,;/]', item): cleaned = wiki_name_to_label(part.strip()) if cleaned: result.append(cleaned) return result def map_capability_level(level_str: str) -> str: """Wandelt Integer-Level in kanonischen Stufen-Slug: "3" → "aufbau" """ return CAPABILITY_LEVEL_MAP.get(level_str.strip(), "basis") def parse_wiki_relevance_level(raw: str | None) -> Optional[int]: """ RelevanzLevel aus Wiki (typisch 1–3). Erlaubt Zahl oder Text mit Ziffer z.B. "Level_2". """ if raw is None: return None s = str(raw).strip() if not s: return None digits = re.findall(r"\d+", s) if not digits: return None try: n = int(digits[0]) except ValueError: return None if n < 1 or n > 3: return None return n # ------------------------------------------------------------------ # # SMW-Property-Label → Mapper-Zielfeld (Werte wie in EXERCISE_PROPERTY_MAP) # # browse_subject liefert Anzeigenamen, nicht zwingend interne Property-IDs. # # ------------------------------------------------------------------ # def _norm_prop_synonym(name: str) -> str: s = (name or "").strip().lower() for a, b in (("ä", "ae"), ("ö", "oe"), ("ü", "ue"), ("ß", "ss")): s = s.replace(a, b) return "".join(c for c in s if c.isalnum()) # Alternative SMW-Anzeigelabel → Zielfeld (gleiche Targets wie SKILL_PROPERTY_MAP) SKILL_PROPERTY_SYNONYM_TO_TARGET: dict[str, str] = { "karaterelevanz": "karate_relevance", "karatebezug": "karate_relevance", "relevanzlevel": "relevance_level", "wikirelevanz": "karate_relevance", } def _skill_property_target(prop_name: str) -> str | None: """Ermittelt Zielfeld für eine Skill-SMW-Property.""" if prop_name in SKILL_PROPERTY_MAP: return SKILL_PROPERTY_MAP[prop_name] n = _norm_prop_synonym(prop_name) return SKILL_PROPERTY_SYNONYM_TO_TARGET.get(n) # alternative Labels → Zielfeld-Name (gleiche Strings wie Werte in EXERCISE_PROPERTY_MAP) EXERCISE_PROPERTY_SYNONYM_TO_TARGET: dict[str, str] = { "primarycapability": "skill_names", "hauptfaehigkeit": "skill_names", "primaerefaehigkeit": "skill_names", "hauptfhigkeit": "skill_names", "hauptfahigkeit": "skill_names", "capabilitylevel": "skill_levels_raw", "faehigkeitsstufe": "skill_levels_raw", "faehigkeitslevel": "skill_levels_raw", "capabilitystufe": "skill_levels_raw", "stilrichtung": "style_names", "trainingsstilrichtung": "style_names", } def _exercise_property_target(prop_name: str) -> str | None: """Ermittelt Zielfeld für eine SMW-Property; None = unbekannt.""" if prop_name in EXERCISE_PROPERTY_MAP: return EXERCISE_PROPERTY_MAP[prop_name] n = _norm_prop_synonym(prop_name) if n in EXERCISE_PROPERTY_SYNONYM_TO_TARGET: return EXERCISE_PROPERTY_SYNONYM_TO_TARGET[n] nlow = (prop_name or "").lower() if "primary" in nlow and "capab" in nlow and "level" not in nlow: return "skill_names" if "capab" in nlow and "level" in nlow: return "skill_levels_raw" return None # ------------------------------------------------------------------ # # Haupt-Mapping-Funktion # # ------------------------------------------------------------------ # def map_wiki_to_exercise( page_title: str, wiki_page_id: Optional[int], smw_props: dict, ) -> dict: """ Wandelt SMW-Properties einer Wiki-Seite in ein Exercise-Dict um. Args: page_title: Titel der Wiki-Seite (Fallback für title) wiki_page_id: Interne MediaWiki-Seiten-ID smw_props: {property_name: [value, ...]} aus SmwClient.browse_subject() Returns: Dict mit gemappten Feldern + Katalog-Listen für ID-Lookup. """ mapped: dict = { "title": page_title, "wiki_page_id": wiki_page_id, # Tracking "import_source": "mediawiki", "import_id": page_title, # Defaults "visibility": "private", "status": "draft", # Katalog-Referenzen (Name → ID-Lookup erfolgt im Router) "focus_area_names": [], "target_group_names": [], "age_group_names": [], "skill_names": [], "skill_levels_raw": [], # Integer-Strings ["3", "2"] "style_names": [], "method_names": [], # Equipment "equipment": [], # Warnungen für unbekannte Katalog-Werte "warnings": [], } for prop_name, values in smw_props.items(): if not values: continue target = _exercise_property_target(prop_name) if not target: continue # Ersten Wert oder ganzes Array first_value = values[0] if isinstance(values, list) else values if target == "title_override": mapped["title"] = wiki_name_to_label(first_value) elif target in ("goal", "execution", "summary", "trainer_notes"): mapped[target] = wikitext_to_plaintext(first_value) elif target == "duration_raw": dur_min, dur_max = parse_duration(first_value) mapped["duration_min"] = dur_min mapped["duration_max"] = dur_max elif target == "group_size_raw": gs_min, gs_max = parse_group_size(first_value) mapped["group_size_min"] = gs_min mapped["group_size_max"] = gs_max elif target == "equipment_raw": mapped["equipment"] = parse_equipment(values if isinstance(values, list) else [values]) elif target == "keywords_raw": # Keywords für spätere Tag-Implementierung speichern mapped["keywords"] = [wiki_name_to_label(v) for v in (values if isinstance(values, list) else [values])] elif target == "focus_area_names": mapped["focus_area_names"] = [wiki_name_to_label(v) for v in (values if isinstance(values, list) else [values])] elif target == "target_group_names": mapped["target_group_names"] = [wiki_name_to_label(v) for v in (values if isinstance(values, list) else [values])] elif target == "age_group_names": mapped["age_group_names"] = [wiki_name_to_label(v) for v in (values if isinstance(values, list) else [values])] elif target == "method_names": mapped["method_names"] = [wiki_name_to_label(v) for v in (values if isinstance(values, list) else [values])] elif target == "style_names": mapped["style_names"] = [wiki_name_to_label(v) for v in (values if isinstance(values, list) else [values])] elif target == "skill_names": mapped["skill_names"] = [wiki_name_to_label(v) for v in (values if isinstance(values, list) else [values])] elif target == "skill_levels_raw": mapped["skill_levels_raw"] = list(values) if isinstance(values, list) else [values] return mapped def build_skill_assignments(mapped: dict) -> list[dict]: """ Erstellt Skill-Zuordnungen aus PrimaryCapability + CapabilityLevel. CapabilityLevel [3, 2] korrespondiert mit PrimaryCapability [Schnellkraft, Schnelligkeitsausdauer] → target_level als kanonischer Slug (basis … optimierung), DB VARCHAR. """ skills = mapped.get("skill_names", []) levels = mapped.get("skill_levels_raw", []) assignments = [] for idx, skill_name in enumerate(skills): level_str = levels[idx] if idx < len(levels) else "1" try: raw = str(level_str).strip() except (TypeError, AttributeError): raw = "1" target_slug = map_capability_level(raw) if raw else "basis" assignments.append({ "skill_name": skill_name, "target_level": target_slug, "required_level": None, "intensity": "mittel", "is_primary": False, }) return assignments def map_wiki_to_skill( page_title: str, wiki_page_id: Optional[int], smw_props: dict, ) -> dict: """Wandelt SMW-Properties einer Fähigkeitsbeschreibung-Seite in ein Skill-Dict um.""" mapped = { "name": page_title, "wiki_page_id": wiki_page_id, "import_source": "mediawiki", "import_id": page_title, "warnings": [], } description_text: Optional[str] = None for prop_name, values in smw_props.items(): if not values: continue target = _skill_property_target(prop_name) if not target: continue first_value = values[0] if isinstance(values, list) else values if target == "description": description_text = wikitext_to_plaintext(str(first_value)) elif target == "karate_relevance": mapped["karate_relevance"] = wikitext_to_plaintext(str(first_value)) elif target == "relevance_level": parsed = parse_wiki_relevance_level(first_value if isinstance(first_value, str) else str(first_value)) if parsed is None: mapped["warnings"].append(f"Unbekanntes RelevanzLevel: {first_value!r}") else: mapped["relevance_level"] = parsed if description_text: mapped["description"] = description_text return mapped def map_wiki_to_method( page_title: str, wiki_page_id: Optional[int], smw_props: dict, ) -> dict: """Wandelt SMW-Properties einer Methodenbeschreibung-Seite in ein Method-Dict um.""" mapped = { "name": page_title, "wiki_page_id": wiki_page_id, "import_source": "mediawiki", "import_id": page_title, "warnings": [], } description_parts = [] for prop_name, values in smw_props.items(): if not values: continue target = METHOD_PROPERTY_MAP.get(prop_name) if not target: continue first_value = values[0] if isinstance(values, list) else values if target == "description": description_parts.insert(0, wikitext_to_plaintext(first_value)) elif target == "code": mapped["code"] = first_value.strip() elif target == "karate_relevance": rel = wikitext_to_plaintext(first_value) description_parts.append(f"\nKarate-Relevanz: {rel}") if description_parts: mapped["description"] = "\n".join(description_parts).strip() return mapped