diff --git a/llm-api/wiki_router.py b/llm-api/wiki_router.py index 7602e70..0f21b37 100644 --- a/llm-api/wiki_router.py +++ b/llm-api/wiki_router.py @@ -24,6 +24,7 @@ from textwrap import dedent import os, time, logging import requests from dotenv import load_dotenv +from starlette.responses import PlainTextResponse load_dotenv() @@ -163,6 +164,94 @@ def _fetch_pageinfo_batch(titles: List[str]) -> Dict[str, Dict[str, Any]]: _sleep() return out +# -------- Doku-Konstanten (Markdown/.env) -------- +MANUAL_WIKI_IMPORTER = dedent(""" +# wiki_importer.py – Kurzanleitung + +## Voraussetzungen +- API erreichbar: `GET /import/wiki/health` (Status `ok`) +- .env: + - `API_BASE_URL=http://localhost:8000` + - `WIKI_BOT_USER`, `WIKI_BOT_PASSWORD` + - optional: `EXERCISE_COLLECTION=exercises` + +## Smoke-Test (3 Läufe) +```bash +python3 wiki_importer.py --title "Affenklatschen" --category "Übungen" --smoke-test +``` + +## Vollimport +```bash +python3 wiki_importer.py --all +# optional: +python3 wiki_importer.py --all --category "Übungen" +python3 wiki_importer.py --all --dry-run +``` + +## Idempotenz-Logik +- external_id = `mw:{pageid}` +- Fingerprint (sha256) über: `title, summary, execution, notes, duration_minutes, capabilities, keywords` +- Entscheid: + - not found → create + - fingerprint gleich → skip + - fingerprint ungleich → update (+ `imported_at`) + +## Mapping (Wiki → Exercise) +- Schlüsselworte → `keywords` (`,`-getrennt, getrimmt, dedupliziert) +- Hilfsmittel → `equipment` +- Disziplin → `discipline` +- Durchführung/Notizen/Vorbereitung/Methodik → `execution`, `notes`, `preparation`, `method` +- Capabilities → `capabilities` (Level 1..5) + Facetten (`capability_ge1..5`, `capability_eq1..5`, `capability_keys`) +- Metadaten → `external_id`, `source="mediawiki"`, `imported_at` + +## Troubleshooting +- 404 bei `/import/wiki/info?...`: prüfe Prefix (kein Doppelprefix), Titelvarianten +- 401 Login: echte User-Creds verwenden +- 502 Upstream: `WIKI_API_URL`/TLS prüfen; Timeouts/Retry/Throttle (`WIKI_TIMEOUT`, `WIKI_RETRIES`, `WIKI_SLEEP_MS`) +""") + +ENV_DOC = [ + {"name": "WIKI_API_URL", "desc": "Basis-URL zur MediaWiki-API (z. B. http://…/w/api.php)"}, + {"name": "WIKI_TIMEOUT", "desc": "Timeout in Sekunden (Default 15)"}, + {"name": "WIKI_RETRIES", "desc": "Anzahl zusätzlicher Versuche (Default 1)"}, + {"name": "WIKI_SLEEP_MS", "desc": "Throttle zwischen Requests in Millisekunden (Default 0)"}, +] + +# -------- Endpoints -------- +@router.get( + "/manual/wiki_importer", + summary="Handbuch: wiki_importer.py (Markdown)", + description="Kompaktes Handbuch mit .env-Hinweisen, Aufrufen, Idempotenz und Troubleshooting.", + response_class=PlainTextResponse, + response_description="Markdown-Text.", + openapi_extra={ + "x-codeSamples": [ + { + "lang": "bash", + "label": "Vollimport (Standard)", + "source": "python3 wiki_importer.py --all" + }, + { + "lang": "bash", + "label": "Dry-Run + Kategorie", + "source": "python3 wiki_importer.py --all --category \"Übungen\" --dry-run" + } + ] + } +) +def manual_wiki_importer(): + return MANUAL_WIKI_IMPORTER + + +@router.get( + "/meta/env", + summary=".env Referenz (Wiki-bezogen)", + description="Listet die relevanten Umgebungsvariablen für die Wiki-Integration auf (ohne Werte).", + response_description="Array aus {name, desc}.", +) +def meta_env() -> List[Dict[str, str]]: + return ENV_DOC + # -------- Endpoints -------- @router.get( "/health",