From 3a4160fd1cfce964657c6e91276bc6cb1d3dee12 Mon Sep 17 00:00:00 2001 From: Lars Date: Mon, 27 Apr 2026 08:45:27 +0200 Subject: [PATCH] fix: detect and cleanup orphaned import references MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Problem: 221 exercises skipped, but only 2 actually in database Root cause: Failed imports created wiki_import_references entries, but exercises were never saved (due to earlier bugs) Duplicate check only verified references existed, not actual data. Result: All subsequent imports skipped 'already imported' items. Solution: - Enhanced duplicate check to verify BOTH reference AND actual entity - If reference exists but entity is missing → delete orphaned reference - Item will be re-imported on same run (no manual cleanup needed) Check logic: 1. Find reference in wiki_import_references 2. Check if exercises/skills/methods table has matching local_id 3. If both exist → skip (true duplicate) 4. If reference exists but entity missing → delete reference + re-import This auto-heals broken state from previous failed imports. --- backend/routers/import_wiki.py | 31 +++++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/backend/routers/import_wiki.py b/backend/routers/import_wiki.py index b648ed0..9d4475b 100644 --- a/backend/routers/import_wiki.py +++ b/backend/routers/import_wiki.py @@ -284,18 +284,37 @@ async def _run_import( page_title = member["title"] page_id = member.get("pageid") - # Duplikat-Check + # Duplikat-Check: Prüfe ob bereits importiert UND ob tatsächlich existiert if not reimport: with get_db() as conn: cur = get_cursor(conn) cur.execute( - "SELECT id FROM wiki_import_references WHERE wiki_page_title = %s AND content_type = %s", + "SELECT local_id FROM wiki_import_references WHERE wiki_page_title = %s AND content_type = %s", (page_title, import_type) ) - if cur.fetchone(): - stats["skipped"] += 1 - _update_log(log_id, **stats) - continue + ref = cur.fetchone() + if ref: + # Prüfe ob die Übung tatsächlich existiert + if import_type == "exercise": + cur.execute("SELECT id FROM exercises WHERE id = %s", (ref['local_id'],)) + elif import_type == "skill": + cur.execute("SELECT id FROM skills WHERE id = %s", (ref['local_id'],)) + else: + cur.execute("SELECT id FROM training_methods WHERE id = %s", (ref['local_id'],)) + + if cur.fetchone(): + # Referenz UND Übung existieren → Skip + stats["skipped"] += 1 + _update_log(log_id, **stats) + continue + else: + # Referenz existiert, aber Übung fehlt → Lösche fehlerhafte Referenz + logger.warning("Orphaned reference found for '%s', re-importing", page_title) + cur.execute( + "DELETE FROM wiki_import_references WHERE wiki_page_title = %s AND content_type = %s", + (page_title, import_type) + ) + conn.commit() # SMW Properties abrufen (mit Retry) try: