llm-api/wiki_router.py aktualisiert
All checks were successful
Deploy Trainer_LLM to llm-node / deploy (push) Successful in 2s

This commit is contained in:
Lars 2025-08-14 08:11:15 +02:00
parent 6a4e97f4e4
commit 1d50e7042e

View File

@ -24,6 +24,7 @@ from textwrap import dedent
import os, time, logging
import requests
from dotenv import load_dotenv
from starlette.responses import PlainTextResponse
load_dotenv()
@ -163,6 +164,94 @@ def _fetch_pageinfo_batch(titles: List[str]) -> Dict[str, Dict[str, Any]]:
_sleep()
return out
# -------- Doku-Konstanten (Markdown/.env) --------
MANUAL_WIKI_IMPORTER = dedent("""
# wiki_importer.py Kurzanleitung
## Voraussetzungen
- API erreichbar: `GET /import/wiki/health` (Status `ok`)
- .env:
- `API_BASE_URL=http://localhost:8000`
- `WIKI_BOT_USER`, `WIKI_BOT_PASSWORD`
- optional: `EXERCISE_COLLECTION=exercises`
## Smoke-Test (3 Läufe)
```bash
python3 wiki_importer.py --title "Affenklatschen" --category "Übungen" --smoke-test
```
## Vollimport
```bash
python3 wiki_importer.py --all
# optional:
python3 wiki_importer.py --all --category "Übungen"
python3 wiki_importer.py --all --dry-run
```
## Idempotenz-Logik
- external_id = `mw:{pageid}`
- Fingerprint (sha256) über: `title, summary, execution, notes, duration_minutes, capabilities, keywords`
- Entscheid:
- not found create
- fingerprint gleich skip
- fingerprint ungleich update (+ `imported_at`)
## Mapping (Wiki → Exercise)
- Schlüsselworte `keywords` (`,`-getrennt, getrimmt, dedupliziert)
- Hilfsmittel `equipment`
- Disziplin `discipline`
- Durchführung/Notizen/Vorbereitung/Methodik `execution`, `notes`, `preparation`, `method`
- Capabilities `capabilities` (Level 1..5) + Facetten (`capability_ge1..5`, `capability_eq1..5`, `capability_keys`)
- Metadaten `external_id`, `source="mediawiki"`, `imported_at`
## Troubleshooting
- 404 bei `/import/wiki/info?...`: prüfe Prefix (kein Doppelprefix), Titelvarianten
- 401 Login: echte User-Creds verwenden
- 502 Upstream: `WIKI_API_URL`/TLS prüfen; Timeouts/Retry/Throttle (`WIKI_TIMEOUT`, `WIKI_RETRIES`, `WIKI_SLEEP_MS`)
""")
ENV_DOC = [
{"name": "WIKI_API_URL", "desc": "Basis-URL zur MediaWiki-API (z. B. http://…/w/api.php)"},
{"name": "WIKI_TIMEOUT", "desc": "Timeout in Sekunden (Default 15)"},
{"name": "WIKI_RETRIES", "desc": "Anzahl zusätzlicher Versuche (Default 1)"},
{"name": "WIKI_SLEEP_MS", "desc": "Throttle zwischen Requests in Millisekunden (Default 0)"},
]
# -------- Endpoints --------
@router.get(
"/manual/wiki_importer",
summary="Handbuch: wiki_importer.py (Markdown)",
description="Kompaktes Handbuch mit .env-Hinweisen, Aufrufen, Idempotenz und Troubleshooting.",
response_class=PlainTextResponse,
response_description="Markdown-Text.",
openapi_extra={
"x-codeSamples": [
{
"lang": "bash",
"label": "Vollimport (Standard)",
"source": "python3 wiki_importer.py --all"
},
{
"lang": "bash",
"label": "Dry-Run + Kategorie",
"source": "python3 wiki_importer.py --all --category \"Übungen\" --dry-run"
}
]
}
)
def manual_wiki_importer():
return MANUAL_WIKI_IMPORTER
@router.get(
"/meta/env",
summary=".env Referenz (Wiki-bezogen)",
description="Listet die relevanten Umgebungsvariablen für die Wiki-Integration auf (ohne Werte).",
response_description="Array aus {name, desc}.",
)
def meta_env() -> List[Dict[str, str]]:
return ENV_DOC
# -------- Endpoints --------
@router.get(
"/health",