shinkan-jinkendo/backend/scripts/generate_migration_023_direct.py
Lars e8eba57b3a
Some checks failed
Deploy Development / deploy (push) Successful in 36s
Test Suite / lint-backend (push) Successful in 0s
Test Suite / build-frontend (push) Successful in 5s
Test Suite / playwright-tests (push) Failing after 1m55s
feat: Migration 022+023 - Vollständiger Skills-Import
- Migration 022: Schema-Erweiterung
  - skill_main_categories (KARATE / ALLGEMEINE)
  - skills.main_category_id + focus_areas JSONB
  - skill_level_definitions Tabelle
  - Indizes für Performance

- Migration 023: Produktionsreifer Skills-Import
  - 69 Skills mit vollständiger Kategorisierung
  - 2 Haupt-Kategorien, 9 Unterkategorien
  - KARATE: 32 Skills (karate focus)
  - ALLGEMEINE: 37 Skills (universal focus)
  - Duplikat-Bereinigung (Timing, Antizipation, etc.)

- Scripts:
  - parse_matrix.py: Extrahiert Skills aus Fähigkeitsmatrix
  - generate_migration_023_direct.py: Generiert Migration direkt aus Matrix

Source: https://karatetrainer.net/index.php?title=Fähigkeitsmatrix

Verified auf Dev: 69 Skills importiert, korrekte Kategorisierung
2026-04-27 10:58:33 +02:00

212 lines
6.9 KiB
Python
Raw Permalink Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""
Generiert Migration 023 direkt aus der Fähigkeitsmatrix (ohne CSV-Zwischenschritt).
"""
import sys
import httpx
from bs4 import BeautifulSoup
from collections import defaultdict
# Force UTF-8 output
sys.stdout.reconfigure(encoding='utf-8')
def to_slug(name):
"""Wandelt Namen in URL-friendly Slugs um."""
return name.lower().replace(' ', '_').replace('ä', 'ae').replace('ö', 'oe').replace('ü', 'ue').replace('ß', 'ss')
# Wiki-Login und Matrix-Abruf
api_url = 'https://karatetrainer.net/api.php'
username = 'Jinkendo'
password = 'Jinkendo6970'
with httpx.Client(timeout=30) as client:
# Login
r1 = client.get(api_url, params={'action': 'query', 'meta': 'tokens', 'type': 'login', 'format': 'json'})
r1.raise_for_status()
token = r1.json().get('query', {}).get('tokens', {}).get('logintoken', '')
r2 = client.post(api_url, data={'action': 'login', 'lgname': username, 'lgpassword': password, 'lgtoken': token, 'format': 'json'})
r2.raise_for_status()
# Hole Matrix
r3 = client.get('https://karatetrainer.net/index.php?title=Fähigkeitsmatrix')
r3.raise_for_status()
html = r3.text
# Parse HTML
soup = BeautifulSoup(html, 'html.parser')
table = soup.find('table', {'class': 'wikitable'})
if not table:
print("ERROR: Tabelle nicht gefunden", file=sys.stderr)
sys.exit(1)
rows = table.find_all('tr')
# Extrahiere Skills
current_main_cat = 'karate'
current_sub_cat = None
skills_data = []
for idx, row in enumerate(rows):
cells = row.find_all(['td', 'th'])
if len(cells) == 1:
text = cells[0].get_text(strip=True)
if 'Inhaltsverzeichnis' in text:
continue
if 'ALLGEMEINE' in text and 'sportliche' in text:
current_main_cat = 'allgemeine'
current_sub_cat = None
else:
current_sub_cat = text.replace('­', '').strip()
elif len(cells) == 6:
skill_name = cells[0].get_text(strip=True).replace('­', '')
if skill_name and current_main_cat and current_sub_cat:
focus = 'karate' if current_main_cat == 'karate' else 'universal'
skills_data.append({
'skill': skill_name,
'sub_cat': current_sub_cat,
'main_cat': current_main_cat,
'focus': focus
})
# Duplikat-Handling
duplicates_to_remove = {
('Anaerobe Ausdauer', 'Kumite'),
('Bewegungsschnelligkeit', 'Kumite'),
('Flexibilität', 'Kumite'),
('Reaktionsschnelligkeit', 'Kumite'),
('Schnelligkeitsausdauer', 'Kumite'),
('Antizipation', 'Koordination'),
('Timing', 'Koordination'),
}
filtered_skills = []
for s in skills_data:
if (s['skill'], s['sub_cat']) not in duplicates_to_remove:
filtered_skills.append(s)
# Gruppiere nach Kategorien
by_main_cat = defaultdict(lambda: defaultdict(list))
sub_categories = {}
for s in filtered_skills:
main = s['main_cat']
sub = s['sub_cat']
by_main_cat[main][sub].append(s)
if sub not in sub_categories:
sub_categories[sub] = (to_slug(sub), main)
# Generiere SQL
print("""-- Migration 023: Vollständiger Skills-Import
-- Purpose: Produktionsreifer Import aller 69 Skills mit vollständiger Kategorisierung
-- Source: Fähigkeitsmatrix https://karatetrainer.net/index.php?title=Fähigkeitsmatrix
-- Date: 2026-04-27
-- ======================================================================
-- CLEANUP: Alte Daten löschen
-- ======================================================================
-- Erst M:N-Beziehungen löschen
DELETE FROM exercise_skills;
-- Skills löschen (Cascades zu skill_level_definitions)
DELETE FROM skills;
-- Kategorien löschen
DELETE FROM skill_categories;
DELETE FROM skill_main_categories WHERE id > 0; -- Falls Tabelle existiert
-- ======================================================================
-- 1. HAUPT-KATEGORIEN
-- ======================================================================
INSERT INTO skill_main_categories (name, slug, description, sort_order) VALUES
('KARATE Fähigkeiten', 'karate', 'Karate-spezifische Techniken und Fähigkeiten', 1),
('ALLGEMEINE sportliche Fähigkeiten', 'allgemeine', 'Universelle sportliche und mentale Fähigkeiten', 2);
-- ======================================================================
-- 2. UNTERKATEGORIEN
-- ======================================================================
""")
# Sortiere Unterkategorien
karate_subs = sorted([(name, slug, main) for name, (slug, main) in sub_categories.items() if main == 'karate'])
allgemeine_subs = sorted([(name, slug, main) for name, (slug, main) in sub_categories.items() if main == 'allgemeine'])
sort_order = 1
print("INSERT INTO skill_categories (name, slug, main_category_id, description, sort_order) VALUES")
for idx, (name, slug, main_cat) in enumerate(karate_subs + allgemeine_subs):
comma = "," if idx < len(karate_subs) + len(allgemeine_subs) - 1 else ";"
print(f"('{name}', '{slug}', (SELECT id FROM skill_main_categories WHERE slug='{main_cat}'), '', {sort_order}){comma}")
sort_order += 1
print("""
-- ======================================================================
-- 3. SKILLS
-- ======================================================================
INSERT INTO skills (name, description, category_id, main_category_id, focus_areas) VALUES""")
# Sortiere Skills
karate_skills = []
for sub in [name for name, slug, main in karate_subs]:
karate_skills.extend(by_main_cat['karate'][sub])
allgemeine_skills = []
for sub in [name for name, slug, main in allgemeine_subs]:
allgemeine_skills.extend(by_main_cat['allgemeine'][sub])
all_skills = karate_skills + allgemeine_skills
for idx, s in enumerate(all_skills):
name = s['skill'].replace("'", "''") # SQL-Escape
sub_cat_slug = to_slug(s['sub_cat'])
main_cat_slug = s['main_cat']
focus = s['focus']
comma = "," if idx < len(all_skills) - 1 else ";"
print(f"('{name}', '', (SELECT id FROM skill_categories WHERE slug='{sub_cat_slug}'), (SELECT id FROM skill_main_categories WHERE slug='{main_cat_slug}'), '[\"{focus}\"]'::jsonb){comma}")
print("""
-- ======================================================================
-- 4. VERIFIKATION
-- ======================================================================
-- Sollte 69 Skills ergeben
DO $$
DECLARE
skill_count INT;
BEGIN
SELECT COUNT(*) INTO skill_count FROM skills;
IF skill_count != 69 THEN
RAISE WARNING 'FEHLER: % Skills gefunden, erwartet 69', skill_count;
ELSE
RAISE NOTICE 'OK: 69 Skills importiert';
END IF;
END $$;
-- Zeige Verteilung
SELECT
mc.name AS hauptkategorie,
sc.name AS unterkategorie,
COUNT(s.id) AS anzahl_skills
FROM skills s
JOIN skill_categories sc ON s.category_id = sc.id
JOIN skill_main_categories mc ON s.main_category_id = mc.id
GROUP BY mc.name, sc.name, mc.sort_order, sc.sort_order
ORDER BY mc.sort_order, sc.sort_order;
""")
print(f"-- Total: {len(all_skills)} Skills", file=sys.stderr)