llm-api/wiki_router.py aktualisiert
All checks were successful
Deploy Trainer_LLM to llm-node / deploy (push) Successful in 2s
All checks were successful
Deploy Trainer_LLM to llm-node / deploy (push) Successful in 2s
This commit is contained in:
parent
6a4e97f4e4
commit
1d50e7042e
|
|
@ -24,6 +24,7 @@ from textwrap import dedent
|
||||||
import os, time, logging
|
import os, time, logging
|
||||||
import requests
|
import requests
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
|
from starlette.responses import PlainTextResponse
|
||||||
|
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
|
|
||||||
|
|
@ -163,6 +164,94 @@ def _fetch_pageinfo_batch(titles: List[str]) -> Dict[str, Dict[str, Any]]:
|
||||||
_sleep()
|
_sleep()
|
||||||
return out
|
return out
|
||||||
|
|
||||||
|
# -------- Doku-Konstanten (Markdown/.env) --------
|
||||||
|
MANUAL_WIKI_IMPORTER = dedent("""
|
||||||
|
# wiki_importer.py – Kurzanleitung
|
||||||
|
|
||||||
|
## Voraussetzungen
|
||||||
|
- API erreichbar: `GET /import/wiki/health` (Status `ok`)
|
||||||
|
- .env:
|
||||||
|
- `API_BASE_URL=http://localhost:8000`
|
||||||
|
- `WIKI_BOT_USER`, `WIKI_BOT_PASSWORD`
|
||||||
|
- optional: `EXERCISE_COLLECTION=exercises`
|
||||||
|
|
||||||
|
## Smoke-Test (3 Läufe)
|
||||||
|
```bash
|
||||||
|
python3 wiki_importer.py --title "Affenklatschen" --category "Übungen" --smoke-test
|
||||||
|
```
|
||||||
|
|
||||||
|
## Vollimport
|
||||||
|
```bash
|
||||||
|
python3 wiki_importer.py --all
|
||||||
|
# optional:
|
||||||
|
python3 wiki_importer.py --all --category "Übungen"
|
||||||
|
python3 wiki_importer.py --all --dry-run
|
||||||
|
```
|
||||||
|
|
||||||
|
## Idempotenz-Logik
|
||||||
|
- external_id = `mw:{pageid}`
|
||||||
|
- Fingerprint (sha256) über: `title, summary, execution, notes, duration_minutes, capabilities, keywords`
|
||||||
|
- Entscheid:
|
||||||
|
- not found → create
|
||||||
|
- fingerprint gleich → skip
|
||||||
|
- fingerprint ungleich → update (+ `imported_at`)
|
||||||
|
|
||||||
|
## Mapping (Wiki → Exercise)
|
||||||
|
- Schlüsselworte → `keywords` (`,`-getrennt, getrimmt, dedupliziert)
|
||||||
|
- Hilfsmittel → `equipment`
|
||||||
|
- Disziplin → `discipline`
|
||||||
|
- Durchführung/Notizen/Vorbereitung/Methodik → `execution`, `notes`, `preparation`, `method`
|
||||||
|
- Capabilities → `capabilities` (Level 1..5) + Facetten (`capability_ge1..5`, `capability_eq1..5`, `capability_keys`)
|
||||||
|
- Metadaten → `external_id`, `source="mediawiki"`, `imported_at`
|
||||||
|
|
||||||
|
## Troubleshooting
|
||||||
|
- 404 bei `/import/wiki/info?...`: prüfe Prefix (kein Doppelprefix), Titelvarianten
|
||||||
|
- 401 Login: echte User-Creds verwenden
|
||||||
|
- 502 Upstream: `WIKI_API_URL`/TLS prüfen; Timeouts/Retry/Throttle (`WIKI_TIMEOUT`, `WIKI_RETRIES`, `WIKI_SLEEP_MS`)
|
||||||
|
""")
|
||||||
|
|
||||||
|
ENV_DOC = [
|
||||||
|
{"name": "WIKI_API_URL", "desc": "Basis-URL zur MediaWiki-API (z. B. http://…/w/api.php)"},
|
||||||
|
{"name": "WIKI_TIMEOUT", "desc": "Timeout in Sekunden (Default 15)"},
|
||||||
|
{"name": "WIKI_RETRIES", "desc": "Anzahl zusätzlicher Versuche (Default 1)"},
|
||||||
|
{"name": "WIKI_SLEEP_MS", "desc": "Throttle zwischen Requests in Millisekunden (Default 0)"},
|
||||||
|
]
|
||||||
|
|
||||||
|
# -------- Endpoints --------
|
||||||
|
@router.get(
|
||||||
|
"/manual/wiki_importer",
|
||||||
|
summary="Handbuch: wiki_importer.py (Markdown)",
|
||||||
|
description="Kompaktes Handbuch mit .env-Hinweisen, Aufrufen, Idempotenz und Troubleshooting.",
|
||||||
|
response_class=PlainTextResponse,
|
||||||
|
response_description="Markdown-Text.",
|
||||||
|
openapi_extra={
|
||||||
|
"x-codeSamples": [
|
||||||
|
{
|
||||||
|
"lang": "bash",
|
||||||
|
"label": "Vollimport (Standard)",
|
||||||
|
"source": "python3 wiki_importer.py --all"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"lang": "bash",
|
||||||
|
"label": "Dry-Run + Kategorie",
|
||||||
|
"source": "python3 wiki_importer.py --all --category \"Übungen\" --dry-run"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
)
|
||||||
|
def manual_wiki_importer():
|
||||||
|
return MANUAL_WIKI_IMPORTER
|
||||||
|
|
||||||
|
|
||||||
|
@router.get(
|
||||||
|
"/meta/env",
|
||||||
|
summary=".env Referenz (Wiki-bezogen)",
|
||||||
|
description="Listet die relevanten Umgebungsvariablen für die Wiki-Integration auf (ohne Werte).",
|
||||||
|
response_description="Array aus {name, desc}.",
|
||||||
|
)
|
||||||
|
def meta_env() -> List[Dict[str, str]]:
|
||||||
|
return ENV_DOC
|
||||||
|
|
||||||
# -------- Endpoints --------
|
# -------- Endpoints --------
|
||||||
@router.get(
|
@router.get(
|
||||||
"/health",
|
"/health",
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user