feat: add recursive subcategory search for MediaWiki import
Some checks failed
Deploy Development / deploy (push) Successful in 34s
Test Suite / lint-backend (push) Successful in 0s
Test Suite / build-frontend (push) Successful in 5s
Test Suite / playwright-tests (push) Failing after 2m2s

Issue: Only 2 exercises found instead of 200+
Root cause: Exercises likely organized in subcategories

Solution:
- Added recursive parameter to get_category_members()
- New _get_subcategories() helper method
- Recursively traverses all subcategories
- Logs subcategory counts for debugging

Behavior:
- get_category_members('Übungen') now finds:
  1. All pages directly in 'Kategorie:Übungen'
  2. All subcategories (e.g. 'Kihon', 'Kata', 'Kumite')
  3. All pages in those subcategories (recursive)

Example structure:
Kategorie:Übungen
├─ Seite: Übung A (2 direkte)
├─ Kategorie:Kihon
│  ├─ Seite: Mae-Geri
│  └─ Seite: Gyaku-Zuki
└─ Kategorie:Kata
   └─ Seite: Heian Shodan
This commit is contained in:
Lars 2026-04-24 17:53:11 +02:00
parent 9d041aaf4f
commit 8b51864b53

View File

@ -83,9 +83,9 @@ class SmwClient:
# Kategorien #
# ------------------------------------------------------------------ #
async def get_category_members(self, category: str, limit: int = 500) -> list[dict]:
async def get_category_members(self, category: str, limit: int = 500, recursive: bool = True) -> list[dict]:
"""
Gibt alle Seiten einer Kategorie zurück.
Gibt alle Seiten einer Kategorie zurück (optional rekursiv durch Unterkategorien).
Gibt Liste von {"pageid": int, "title": str} zurück.
"""
members = []
@ -97,7 +97,7 @@ class SmwClient:
"list": "categorymembers",
"cmtitle": f"Kategorie:{category}",
"cmlimit": min(limit, 500),
"cmtype": "page",
"cmtype": "page", # Nur Seiten, keine Unterkategorien
"cmprop": "ids|title",
}
if cmcontinue:
@ -111,8 +111,47 @@ class SmwClient:
else:
break
# Rekursiv durch Unterkategorien gehen
if recursive:
subcats = await self._get_subcategories(category)
logger.info(f"Kategorie '{category}': {len(members)} direkte Seiten, {len(subcats)} Unterkategorien")
for subcat in subcats:
if len(members) >= limit:
break
subcat_name = subcat["title"].replace("Kategorie:", "")
subcat_members = await self.get_category_members(subcat_name, limit=limit - len(members), recursive=True)
members.extend(subcat_members)
return members[:limit]
async def _get_subcategories(self, category: str) -> list[dict]:
"""Gibt alle Unterkategorien einer Kategorie zurück."""
subcats = []
cmcontinue = None
while True:
params = {
"action": "query",
"list": "categorymembers",
"cmtitle": f"Kategorie:{category}",
"cmlimit": 500,
"cmtype": "subcat", # Nur Unterkategorien
"cmprop": "ids|title",
}
if cmcontinue:
params["cmcontinue"] = cmcontinue
data = await self._get(params)
subcats.extend(data["query"]["categorymembers"])
if "continue" in data:
cmcontinue = data["continue"].get("cmcontinue")
else:
break
return subcats
# ------------------------------------------------------------------ #
# Seiteninhalte #
# ------------------------------------------------------------------ #