scripts/wiki_importer.py aktualisiert
All checks were successful
Deploy Trainer_LLM to llm-node / deploy (push) Successful in 1s
All checks were successful
Deploy Trainer_LLM to llm-node / deploy (push) Successful in 1s
This commit is contained in:
parent
6bab3cdf04
commit
cf085f8ef0
|
|
@ -1,21 +1,20 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
"""
|
"""
|
||||||
Module: wiki_importer.py
|
wiki_importer.py – v2.3.5
|
||||||
Beschreibung:
|
|
||||||
- Importiert Übungen aus dem MediaWiki via FastAPI wiki_router
|
|
||||||
- Login gegen /import/wiki/login (abschaltbar via --skip-login)
|
|
||||||
- Titel-Liste via /semantic/pages, Parsing via /parsepage, Info via /info (nur wenn nötig)
|
|
||||||
- Idempotentes Upsert: external_id="mw:{pageid}", Fingerprint (sha256) über Kernfelder
|
|
||||||
- Lookup via /exercise/by-external-id, dann create/update/skip inkl. Zählern
|
|
||||||
- Smoke-Test (--smoke-test): 3 Läufe (create → skip → update)
|
|
||||||
|
|
||||||
v2.3.4 – Änderungen ggü. 2.3.3:
|
Fix: Einige Felder (discipline, execution, keywords, equipment) wurden in einzelnen Seiten
|
||||||
- **Robuste Template-Erkennung**: Namen werden unicode-normalisiert & diakritik-insensitiv verglichen
|
nicht mehr gefüllt. Ursache: Template-/Key-Varianten (z. B. "Übung Infobox" mit Leerzeichen,
|
||||||
(z. B. "ÜbungInfoBox" == "UebungInfoBox" == "uebunginfobox").
|
"Geräte/Material", "Schlüsselwörter", "Hilfsmittel", "Ablauf" usw.) wurden vom Matcher
|
||||||
- **Feld-Synonyme & Key-Normalisierung**: "summary/execution/duration/keywords/..." werden über
|
nicht immer erkannt.
|
||||||
mehrere mögliche Parameternamen aufgelöst (z. B. Durchführung/Durchfuehrung/Ablauf).
|
|
||||||
- Ziel: Verhindert leere Felder beim 2. Lauf und damit fälschliche Updates.
|
Änderungen ggü. v2.3.4:
|
||||||
|
- Separater Normalizer für Template-Namen (entfernt Nicht‑Alphanumerika), dadurch matchen auch
|
||||||
|
Varianten wie "Übung Infobox", "Uebung-Infobox" etc.
|
||||||
|
- Erweitertes Synonym-Set für Felder: execution/keywords/equipment/discipline u. a.
|
||||||
|
- Fuzzy‑Fallback: Wenn _get_first() nichts findet, suche Keys, die die Token enthalten
|
||||||
|
(z. B. "gerate/material" ⇒ equipment).
|
||||||
|
- Keine API-/CLI-Änderungen.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import os
|
import os
|
||||||
|
|
@ -32,10 +31,9 @@ import time
|
||||||
import unicodedata
|
import unicodedata
|
||||||
|
|
||||||
# ----- Konfiguration / Defaults -----
|
# ----- Konfiguration / Defaults -----
|
||||||
load_dotenv() # .env laden, falls vorhanden
|
load_dotenv()
|
||||||
|
API_BASE_URL = os.getenv("API_BASE_URL", "http://localhost:8000/import/wiki")
|
||||||
API_BASE_URL = os.getenv("API_BASE_URL", "http://localhost:8000/import/wiki") # FastAPI-Wiki-Proxy
|
EXERCISE_API = os.getenv("EXERCISE_API_URL", "http://localhost:8000/exercise")
|
||||||
EXERCISE_API = os.getenv("EXERCISE_API_URL", "http://localhost:8000/exercise") # Exercise-Endpoint
|
|
||||||
DEFAULT_CAT = os.getenv("WIKI_CATEGORY", "Übungen")
|
DEFAULT_CAT = os.getenv("WIKI_CATEGORY", "Übungen")
|
||||||
DEFAULT_TITLE = os.getenv("WIKI_EXERCISE_TITLE", "Affenklatschen")
|
DEFAULT_TITLE = os.getenv("WIKI_EXERCISE_TITLE", "Affenklatschen")
|
||||||
REQUEST_TIMEOUT = int(os.getenv("REQUEST_TIMEOUT", "60"))
|
REQUEST_TIMEOUT = int(os.getenv("REQUEST_TIMEOUT", "60"))
|
||||||
|
|
@ -49,23 +47,30 @@ def _strip_diacritics(s: str) -> str:
|
||||||
return "".join(ch for ch in unicodedata.normalize("NFD", s) if not unicodedata.combining(ch))
|
return "".join(ch for ch in unicodedata.normalize("NFD", s) if not unicodedata.combining(ch))
|
||||||
|
|
||||||
def _norm_key(s: str) -> str:
|
def _norm_key(s: str) -> str:
|
||||||
|
# Für Parameter-Namen: diakritikfrei + getrimmt + casefold; Sonderzeichen bleiben erhalten,
|
||||||
|
# damit Kombinationen wie "gerate/material" unterscheidbar sind
|
||||||
s = _norm_unicode(s or "")
|
s = _norm_unicode(s or "")
|
||||||
s = _strip_diacritics(s)
|
s = _strip_diacritics(s)
|
||||||
s = s.strip().casefold()
|
s = s.strip().casefold()
|
||||||
return s
|
return s
|
||||||
|
|
||||||
# Template-Aliasse (normalisierte Namen)
|
def _norm_tpl(s: str) -> str:
|
||||||
TPL_UEBUNG_INFOBOX = {"ubunginfobox", "uebunginfobox"}
|
# Für Template-Namen: zusätzlich alle Nicht‑Alphanumerika entfernen
|
||||||
TPL_UEBUNGSBESCHREIBUNG = {"ubungsbeschreibung", "uebungsbeschreibung"}
|
s = _norm_key(s)
|
||||||
|
return "".join(ch for ch in s if ch.isalnum())
|
||||||
|
|
||||||
|
# Template-Aliasse (normalisierte Namen, _norm_tpl!)
|
||||||
|
TPL_UEBUNG_INFOBOX = {"ubunginfobox", "uebunginfobox", "ubunginfo", "uebunginfo"}
|
||||||
|
TPL_UEBUNGSBESCHREIBUNG = {"ubungsbeschreibung", "uebungsbeschreibung", "beschreibungubung", "beschreibunguebung"}
|
||||||
TPL_SKILLDEV = {"skilldevelopment"}
|
TPL_SKILLDEV = {"skilldevelopment"}
|
||||||
|
|
||||||
# Synonyme für Parameter (normalisierte Keys)
|
# Synonyme für Parameter (normalisierte Keys via _norm_key)
|
||||||
KEYS_SUMMARY = ["summary", "kurzbeschreibung", "beschreibung", "kurztext"]
|
KEYS_SUMMARY = ["summary", "kurzbeschreibung", "beschreibung", "kurztext"]
|
||||||
KEYS_EXECUTION = ["durchführung", "durchfuehrung", "ausführung", "ausfuehrung", "execution", "ablauf"]
|
KEYS_EXECUTION = ["durchführung", "durchfuehrung", "ausführung", "ausfuehrung", "execution", "ablauf", "vorgehen"]
|
||||||
KEYS_DURATION = ["dauer", "zeit", "dauer_minuten", "dauer (min)", "minuten"]
|
KEYS_DURATION = ["dauer", "zeit", "dauer_minuten", "dauer (min)", "minuten"]
|
||||||
KEYS_KEYWORDS = ["schlüsselworte", "schluesselworte", "keywords", "tags"]
|
KEYS_KEYWORDS = ["schlüsselworte", "schluesselworte", "schlüsselwörter", "schluesselwoerter", "keywords", "stichworte", "schlagworte", "tags"]
|
||||||
KEYS_EQUIPMENT = ["equipment", "geräte", "geraete", "material"]
|
KEYS_EQUIPMENT = ["equipment", "geräte", "geraete", "gerät", "geraet", "material", "hilfsmittel", "gerate/material"]
|
||||||
KEYS_DISCIPLINE = ["übungstyp", "uebungstyp", "discipline"]
|
KEYS_DISCIPLINE = ["übungstyp", "uebungstyp", "discipline", "disziplin", "schwerpunkt", "bereich", "thema", "technik"]
|
||||||
KEYS_GROUP = ["gruppengröße", "gruppengroesse", "group"]
|
KEYS_GROUP = ["gruppengröße", "gruppengroesse", "group"]
|
||||||
KEYS_AGE_GROUP = ["altersgruppe"]
|
KEYS_AGE_GROUP = ["altersgruppe"]
|
||||||
KEYS_TARGET_GROUP = ["zielgruppe", "target_group"]
|
KEYS_TARGET_GROUP = ["zielgruppe", "target_group"]
|
||||||
|
|
@ -74,14 +79,13 @@ KEYS_PREPARATION = ["refmethode", "vorbereitung", "preparation"]
|
||||||
KEYS_METHOD = ["method", "methode"]
|
KEYS_METHOD = ["method", "methode"]
|
||||||
KEYS_NOTES = ["hinweise", "notes"]
|
KEYS_NOTES = ["hinweise", "notes"]
|
||||||
|
|
||||||
# ---- Hilfsfunktionen ----
|
# ---- Wiki-Router Helpers ----
|
||||||
|
|
||||||
def wiki_health() -> None:
|
def wiki_health() -> None:
|
||||||
r = requests.get(f"{API_BASE_URL}/health", timeout=15)
|
r = requests.get(f"{API_BASE_URL}/health", timeout=15)
|
||||||
r.raise_for_status()
|
r.raise_for_status()
|
||||||
print("[Sanity] Wiki health OK")
|
print("[Sanity] Wiki health OK")
|
||||||
|
|
||||||
|
|
||||||
def wiki_login(username: str, password: str) -> None:
|
def wiki_login(username: str, password: str) -> None:
|
||||||
payload = {"username": username, "password": password}
|
payload = {"username": username, "password": password}
|
||||||
r = requests.post(f"{API_BASE_URL}/login", json=payload, timeout=30)
|
r = requests.post(f"{API_BASE_URL}/login", json=payload, timeout=30)
|
||||||
|
|
@ -96,19 +100,18 @@ def wiki_login(username: str, password: str) -> None:
|
||||||
raise RuntimeError(f"[Login] {msg}")
|
raise RuntimeError(f"[Login] {msg}")
|
||||||
print("[Login] success")
|
print("[Login] success")
|
||||||
|
|
||||||
|
|
||||||
def fetch_all_pages(category: str) -> Dict[str, Any]:
|
def fetch_all_pages(category: str) -> Dict[str, Any]:
|
||||||
resp = requests.get(f"{API_BASE_URL}/semantic/pages", params={"category": category}, timeout=REQUEST_TIMEOUT)
|
resp = requests.get(f"{API_BASE_URL}/semantic/pages", params={"category": category}, timeout=REQUEST_TIMEOUT)
|
||||||
resp.raise_for_status()
|
resp.raise_for_status()
|
||||||
return resp.json()
|
return resp.json()
|
||||||
|
|
||||||
|
|
||||||
def fetch_page_info(title: str) -> Dict[str, Any]:
|
def fetch_page_info(title: str) -> Dict[str, Any]:
|
||||||
r = requests.get(f"{API_BASE_URL}/info", params={"title": title}, timeout=30)
|
r = requests.get(f"{API_BASE_URL}/info", params={"title": title}, timeout=30)
|
||||||
r.raise_for_status()
|
r.raise_for_status()
|
||||||
info = r.json()
|
info = r.json()
|
||||||
return {"pageid": info.get("pageid"), "fullurl": info.get("fullurl")}
|
return {"pageid": info.get("pageid"), "fullurl": info.get("fullurl")}
|
||||||
|
|
||||||
|
# ---- Parser ----
|
||||||
|
|
||||||
def parse_exercise(title: str, pageid: int) -> Dict[str, Any]:
|
def parse_exercise(title: str, pageid: int) -> Dict[str, Any]:
|
||||||
print(f"[Parse] Lade '{title}' (ID={pageid})")
|
print(f"[Parse] Lade '{title}' (ID={pageid})")
|
||||||
|
|
@ -123,19 +126,14 @@ def parse_exercise(title: str, pageid: int) -> Dict[str, Any]:
|
||||||
|
|
||||||
raw: Dict[str, Any] = {"title": title, "source": "MediaWiki", "pageid": pageid}
|
raw: Dict[str, Any] = {"title": title, "source": "MediaWiki", "pageid": pageid}
|
||||||
|
|
||||||
# Templates sammeln (robust gegen Varianten)
|
|
||||||
for tpl in wikicode.filter_templates():
|
for tpl in wikicode.filter_templates():
|
||||||
name_raw = str(tpl.name)
|
name_norm = _norm_tpl(str(tpl.name))
|
||||||
name_norm = _norm_key(name_raw)
|
if name_norm in TPL_UEBUNG_INFOBOX or name_norm in TPL_UEBUNGSBESCHREIBUNG:
|
||||||
if name_norm in TPL_UEBUNG_INFOBOX:
|
|
||||||
for p in tpl.params:
|
|
||||||
raw[str(p.name).strip()] = str(p.value).strip()
|
|
||||||
elif name_norm in TPL_UEBUNGSBESCHREIBUNG:
|
|
||||||
for p in tpl.params:
|
for p in tpl.params:
|
||||||
|
# Original-Parameternamen übernehmen; Normalisierung passiert später
|
||||||
raw[str(p.name).strip()] = str(p.value).strip()
|
raw[str(p.name).strip()] = str(p.value).strip()
|
||||||
elif name_norm in TPL_SKILLDEV:
|
elif name_norm in TPL_SKILLDEV:
|
||||||
raw.setdefault("capabilities", [])
|
raw.setdefault("capabilities", [])
|
||||||
# Standard-Keys (engl. Template)
|
|
||||||
def _getp(t, k):
|
def _getp(t, k):
|
||||||
try:
|
try:
|
||||||
return str(t.get(k).value).strip()
|
return str(t.get(k).value).strip()
|
||||||
|
|
@ -149,10 +147,11 @@ def parse_exercise(title: str, pageid: int) -> Dict[str, Any]:
|
||||||
lvl_i = 0
|
lvl_i = 0
|
||||||
if cap:
|
if cap:
|
||||||
raw["capabilities"].append({"capability": cap, "level": lvl_i})
|
raw["capabilities"].append({"capability": cap, "level": lvl_i})
|
||||||
|
|
||||||
raw["wikitext"] = wikitext
|
raw["wikitext"] = wikitext
|
||||||
return raw
|
return raw
|
||||||
|
|
||||||
# ---- Fingerprint-Unterstützung (stabil) ----
|
# ---- Fingerprint (stabil) ----
|
||||||
|
|
||||||
def _normalize(v: Any) -> str:
|
def _normalize(v: Any) -> str:
|
||||||
if v is None:
|
if v is None:
|
||||||
|
|
@ -163,21 +162,18 @@ def _normalize(v: Any) -> str:
|
||||||
return json.dumps(v, sort_keys=True, ensure_ascii=False)
|
return json.dumps(v, sort_keys=True, ensure_ascii=False)
|
||||||
return str(v).strip()
|
return str(v).strip()
|
||||||
|
|
||||||
|
|
||||||
def _norm_text(s: str) -> str:
|
def _norm_text(s: str) -> str:
|
||||||
if s is None:
|
if s is None:
|
||||||
return ""
|
return ""
|
||||||
s = str(s).replace("\u00a0", " ") # NBSP → Space
|
s = str(s).replace("\u00a0", " ")
|
||||||
s = s.strip()
|
s = s.strip()
|
||||||
s = " ".join(s.split()) # Collapse whitespace
|
s = " ".join(s.split())
|
||||||
return s
|
return s
|
||||||
|
|
||||||
|
|
||||||
def _canon_title(t: str) -> str:
|
def _canon_title(t: str) -> str:
|
||||||
t = (t or "").strip().replace("_", " ")
|
t = (t or "").strip().replace("_", " ")
|
||||||
return t.replace("–", "-").replace("—", "-")
|
return t.replace("–", "-").replace("—", "-")
|
||||||
|
|
||||||
|
|
||||||
def compute_fingerprint(payload: Dict[str, Any]) -> str:
|
def compute_fingerprint(payload: Dict[str, Any]) -> str:
|
||||||
kws = payload.get("keywords") or []
|
kws = payload.get("keywords") or []
|
||||||
kws = sorted({(k or "").strip() for k in kws if (k or "").strip()}, key=str.casefold)
|
kws = sorted({(k or "").strip() for k in kws if (k or "").strip()}, key=str.casefold)
|
||||||
|
|
@ -198,11 +194,10 @@ def compute_fingerprint(payload: Dict[str, Any]) -> str:
|
||||||
base = "|".join(_normalize(f) for f in fields)
|
base = "|".join(_normalize(f) for f in fields)
|
||||||
return hashlib.sha256(base.encode("utf-8")).hexdigest()
|
return hashlib.sha256(base.encode("utf-8")).hexdigest()
|
||||||
|
|
||||||
# ---- Feldauflösung (Synonyme) ----
|
# ---- Feldauflösung (Synonyme + Fuzzy) ----
|
||||||
|
|
||||||
def _norm_keymap(d: Dict[str, Any]) -> Dict[str, Any]:
|
def _norm_keymap(d: Dict[str, Any]) -> Dict[str, Any]:
|
||||||
return { _norm_key(k): v for k, v in d.items() if isinstance(k, str) }
|
return {_norm_key(k): v for k, v in d.items() if isinstance(k, str)}
|
||||||
|
|
||||||
|
|
||||||
def _get_first(d: Dict[str, Any], candidates: List[str]) -> Any:
|
def _get_first(d: Dict[str, Any], candidates: List[str]) -> Any:
|
||||||
m = _norm_keymap(d)
|
m = _norm_keymap(d)
|
||||||
|
|
@ -212,9 +207,20 @@ def _get_first(d: Dict[str, Any], candidates: List[str]) -> Any:
|
||||||
return v
|
return v
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
def _get_first_fuzzy(d: Dict[str, Any], tokens: List[str]) -> Any:
|
||||||
|
# Finde einen Key, der alle Tokens (als Teilstring) enthält
|
||||||
|
m = _norm_keymap(d)
|
||||||
|
for k, v in m.items():
|
||||||
|
if v in (None, ""):
|
||||||
|
continue
|
||||||
|
if all(t in k for t in tokens):
|
||||||
|
return v
|
||||||
|
return None
|
||||||
|
|
||||||
|
# ---- Payload ----
|
||||||
|
|
||||||
def build_payload(raw: Dict[str, Any], fullurl: str, category: str, *, mutate: bool = False) -> Dict[str, Any]:
|
def build_payload(raw: Dict[str, Any], fullurl: str, category: str, *, mutate: bool = False) -> Dict[str, Any]:
|
||||||
# Exercise.capabilities erwartet Dict[str,int]
|
# Capabilities -> Dict[str,int]
|
||||||
caps_list = raw.get("capabilities", [])
|
caps_list = raw.get("capabilities", [])
|
||||||
capabilities: Dict[str, int] = {}
|
capabilities: Dict[str, int] = {}
|
||||||
for c in caps_list:
|
for c in caps_list:
|
||||||
|
|
@ -226,10 +232,12 @@ def build_payload(raw: Dict[str, Any], fullurl: str, category: str, *, mutate: b
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
# Defaults/Fallbacks via Synonyme
|
|
||||||
# summary / execution
|
# summary / execution
|
||||||
summary = _get_first(raw, KEYS_SUMMARY) or ""
|
summary = _get_first(raw, KEYS_SUMMARY) or ""
|
||||||
execution = _get_first(raw, KEYS_EXECUTION) or ""
|
execution = _get_first(raw, KEYS_EXECUTION)
|
||||||
|
if execution in (None, ""):
|
||||||
|
# Fuzzy: Key enthält z. B. "ablauf" oder "durchfuehrung"
|
||||||
|
execution = _get_first_fuzzy(raw, ["ablauf"]) or _get_first_fuzzy(raw, ["durchf",]) or ""
|
||||||
|
|
||||||
# duration
|
# duration
|
||||||
duration = _get_first(raw, KEYS_DURATION)
|
duration = _get_first(raw, KEYS_DURATION)
|
||||||
|
|
@ -240,12 +248,16 @@ def build_payload(raw: Dict[str, Any], fullurl: str, category: str, *, mutate: b
|
||||||
|
|
||||||
# keywords
|
# keywords
|
||||||
kw_raw = _get_first(raw, KEYS_KEYWORDS)
|
kw_raw = _get_first(raw, KEYS_KEYWORDS)
|
||||||
|
if kw_raw in (None, ""):
|
||||||
|
kw_raw = _get_first_fuzzy(raw, ["stich", "worte"]) or _get_first_fuzzy(raw, ["schlag", "worte"]) or ""
|
||||||
keywords: List[str] = []
|
keywords: List[str] = []
|
||||||
if isinstance(kw_raw, str):
|
if isinstance(kw_raw, str):
|
||||||
keywords = [k.strip() for k in kw_raw.split(",") if k.strip()]
|
keywords = [k.strip() for k in kw_raw.split(",") if k.strip()]
|
||||||
|
|
||||||
# equipment
|
# equipment
|
||||||
eq_raw = _get_first(raw, KEYS_EQUIPMENT)
|
eq_raw = _get_first(raw, KEYS_EQUIPMENT)
|
||||||
|
if eq_raw in (None, ""):
|
||||||
|
eq_raw = _get_first_fuzzy(raw, ["gerate", "material"]) or _get_first_fuzzy(raw, ["hilfsmittel"]) or ""
|
||||||
equipment: List[str] = []
|
equipment: List[str] = []
|
||||||
if isinstance(eq_raw, str):
|
if isinstance(eq_raw, str):
|
||||||
equipment = [e.strip() for e in eq_raw.split(",") if e.strip()]
|
equipment = [e.strip() for e in eq_raw.split(",") if e.strip()]
|
||||||
|
|
@ -257,6 +269,9 @@ def build_payload(raw: Dict[str, Any], fullurl: str, category: str, *, mutate: b
|
||||||
notes = (str(notes) + " [auto-update]").strip()
|
notes = (str(notes) + " [auto-update]").strip()
|
||||||
|
|
||||||
discipline = _get_first(raw, KEYS_DISCIPLINE) or ""
|
discipline = _get_first(raw, KEYS_DISCIPLINE) or ""
|
||||||
|
if discipline in (None, ""):
|
||||||
|
discipline = _get_first_fuzzy(raw, ["ubung", "typ"]) or _get_first_fuzzy(raw, ["schwerpunkt"]) or ""
|
||||||
|
|
||||||
group = _get_first(raw, KEYS_GROUP) or None
|
group = _get_first(raw, KEYS_GROUP) or None
|
||||||
age_group = _get_first(raw, KEYS_AGE_GROUP) or ""
|
age_group = _get_first(raw, KEYS_AGE_GROUP) or ""
|
||||||
target_group = _get_first(raw, KEYS_TARGET_GROUP) or ""
|
target_group = _get_first(raw, KEYS_TARGET_GROUP) or ""
|
||||||
|
|
@ -291,7 +306,7 @@ def build_payload(raw: Dict[str, Any], fullurl: str, category: str, *, mutate: b
|
||||||
payload["fingerprint"] = compute_fingerprint(payload)
|
payload["fingerprint"] = compute_fingerprint(payload)
|
||||||
return payload
|
return payload
|
||||||
|
|
||||||
# ---- Lookup/Upsert ----
|
# ---- Lookup/Upsert (wie v2.3.4) ----
|
||||||
|
|
||||||
def lookup_by_external_id(external_id: str) -> Tuple[Optional[Dict[str, Any]], Optional[int]]:
|
def lookup_by_external_id(external_id: str) -> Tuple[Optional[Dict[str, Any]], Optional[int]]:
|
||||||
url = f"{EXERCISE_API}/by-external-id"
|
url = f"{EXERCISE_API}/by-external-id"
|
||||||
|
|
@ -374,7 +389,7 @@ def upsert_exercise(payload: Dict[str, Any], *, dry_run: bool = False) -> str:
|
||||||
action, reason = "create", "unexpected lookup type"
|
action, reason = "create", "unexpected lookup type"
|
||||||
|
|
||||||
if dry_run:
|
if dry_run:
|
||||||
print(f"[DryRun] {action.upper():6} '{title}' ({ext_id}) - {reason}")
|
print(f"[DryRun] {action.upper():6} '{title}' ({ext_id}) – {reason}")
|
||||||
if action == "update":
|
if action == "update":
|
||||||
_print_diff(found_payload, payload)
|
_print_diff(found_payload, payload)
|
||||||
return action
|
return action
|
||||||
|
|
@ -402,10 +417,10 @@ def upsert_exercise(payload: Dict[str, Any], *, dry_run: bool = False) -> str:
|
||||||
pass
|
pass
|
||||||
else:
|
else:
|
||||||
resp.raise_for_status()
|
resp.raise_for_status()
|
||||||
print(f"[Update] '{title}' - {reason} -> OK")
|
print(f"[Update] '{title}' – {reason} -> OK")
|
||||||
_print_diff(found_payload, payload)
|
_print_diff(found_payload, payload)
|
||||||
else:
|
else:
|
||||||
print(f"[Skip] '{title}' - {reason}")
|
print(f"[Skip] '{title}' – {reason}")
|
||||||
return action
|
return action
|
||||||
|
|
||||||
# ----- Orchestrierung -----
|
# ----- Orchestrierung -----
|
||||||
|
|
@ -485,7 +500,6 @@ def run_smoke_test(title: str, category: str) -> None:
|
||||||
print("\n[SmokeTest] Zusammenfassung:")
|
print("\n[SmokeTest] Zusammenfassung:")
|
||||||
print(json.dumps({"run1": act1, "run2": act2, "run3": act3}, ensure_ascii=False, indent=2))
|
print(json.dumps({"run1": act1, "run2": act2, "run3": act3}, ensure_ascii=False, indent=2))
|
||||||
|
|
||||||
|
|
||||||
# ----- Main -----
|
# ----- Main -----
|
||||||
|
|
||||||
def main() -> None:
|
def main() -> None:
|
||||||
|
|
@ -500,10 +514,8 @@ def main() -> None:
|
||||||
parser.add_argument("--smoke-test", action="store_true", help="3 Durchläufe (create→skip→update) für --title")
|
parser.add_argument("--smoke-test", action="store_true", help="3 Durchläufe (create→skip→update) für --title")
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
# Sanity
|
|
||||||
wiki_health()
|
wiki_health()
|
||||||
|
|
||||||
# Login (sofern nicht explizit übersprungen)
|
|
||||||
if not args.skip_login:
|
if not args.skip_login:
|
||||||
if not args.username or not args.password:
|
if not args.username or not args.password:
|
||||||
print("[Login] Fehler: fehlende Credentials. Setze .env (WIKI_BOT_USER/WIKI_BOT_PASSWORD) oder CLI --username/--password.", file=sys.stderr)
|
print("[Login] Fehler: fehlende Credentials. Setze .env (WIKI_BOT_USER/WIKI_BOT_PASSWORD) oder CLI --username/--password.", file=sys.stderr)
|
||||||
|
|
@ -526,6 +538,5 @@ def main() -> None:
|
||||||
result = process_one(args.title, args.category, mutate=False, dry_run=args.dry_run)
|
result = process_one(args.title, args.category, mutate=False, dry_run=args.dry_run)
|
||||||
print(f"[Result] {result}")
|
print(f"[Result] {result}")
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user