Trainer_LLM/llm-api/wiki_router0.1.2.py

122 lines
5.3 KiB
Python

from fastapi import APIRouter, HTTPException, Query
from pydantic import BaseModel
from typing import List
import requests, os
# Version hochgezählt
__version__ = "1.1.6"
router = APIRouter()
# MediaWiki-Konfiguration
WIKI_API_URL = os.getenv("WIKI_API_URL", "https://karatetrainer.net/api.php")
WIKI_BOT_USER = os.getenv("WIKI_BOT_USER", "")
WIKI_BOT_PASSWORD = os.getenv("WIKI_BOT_PASSWORD", "")
wiki_session = requests.Session()
# Models
class WikiLoginRequest(BaseModel):
username: str
password: str
class WikiLoginResponse(BaseModel):
status: str
message: str | None = None
class CategoryMembersResponse(BaseModel):
pageid: int
title: str
class PageContentResponse(BaseModel):
pageid: int
title: str
content: str
# Health-Check
@router.get("/health")
def health_check():
params = {"action": "query", "meta": "siteinfo", "siprop": "general", "format": "json"}
try:
resp = wiki_session.get(WIKI_API_URL, params=params, timeout=5)
resp.raise_for_status()
except Exception as e:
raise HTTPException(status_code=502, detail=f"Wiki nicht erreichbar: {e}")
return {"status": "ok"}
# Login Endpoint
@router.post("/login", response_model=WikiLoginResponse)
def login(data: WikiLoginRequest):
# Direkter Abgleich zu Testzwecken
if data.username == WIKI_BOT_USER and data.password == WIKI_BOT_PASSWORD:
return WikiLoginResponse(status="success", message=None)
return WikiLoginResponse(status="failed", message="Incorrect username or password.")
# 1) Kategorie abrufen
@router.get("/pages", response_model=List[CategoryMembersResponse])
def list_category_members(category: str = Query(..., description="Kategorie-Name ohne 'Category:'")):
cmtitle = f"Category:{category}"
params = {"action": "query", "list": "categorymembers", "cmtitle": cmtitle, "cmlimit": 500, "format": "json"}
try:
r = wiki_session.get(WIKI_API_URL, params=params, timeout=10)
r.raise_for_status()
members = r.json().get("query", {}).get("categorymembers", [])
except Exception as e:
raise HTTPException(status_code=502, detail=f"Kategorie-Error: {e}")
return [CategoryMembersResponse(pageid=m["pageid"], title=m["title"]) for m in members]
# 2) Seiteninhalt abrufen
@router.post("/pagecontent", response_model=PageContentResponse)
def get_page_content(pageid: int = Query(...), title: str = Query(None)):
params = {"action": "query", "prop": "revisions", "rvprop": "content", "rvslots": "main", "pageids": pageid, "format": "json"}
try:
r = wiki_session.get(WIKI_API_URL, params=params, timeout=10)
r.raise_for_status()
pages = r.json().get("query", {}).get("pages", {})
page = pages.get(str(pageid), {})
content = page.get("revisions", [{}])[0].get("slots", {}).get("main", {}).get("*", "")
except Exception as e:
raise HTTPException(status_code=502, detail=f"Content-Error: {e}")
return PageContentResponse(pageid=pageid, title=title or page.get("title"), content=content)
# 3) Importiere Übungen aus Kategorie
@router.post("/import/exercises")
def import_exercises(category: str = Query(..., description="Kategorie ohne 'Category:'")):
"""
Holt alle Seiten einer Kategorie, parsed deren Wikitext und importiert Übungen.
"""
import mwparserfromhell
# Schritt 1: Seitenliste
pages = list_category_members(category)
imported = []
for p in pages:
# Schritt 2: Inhalt ziehen
pc = get_page_content(pageid=p.pageid, title=p.title)
wikicode = mwparserfromhell.parse(pc.content)
# Infobox parsen
templates = wikicode.filter_templates()
infobox = next((t for t in templates if t.name.strip() == 'ÜbungInfoBox'), None)
if not infobox:
continue
# Felder extrahieren
ex = {
'title': infobox.get('title').value.strip() if infobox.has('title') else p.title,
'summary': infobox.get('summary').value.strip() if infobox.has('summary') else '',
'short_description': infobox.get('short_description').value.strip() if infobox.has('short_description') else '',
'keywords': [kw.strip() for kw in infobox.get('keywords').value.split(',')] if infobox.has('keywords') else [],
'link': None,
'discipline': infobox.get('discipline').value.strip() if infobox.has('discipline') else '',
'group': infobox.get('group').value.strip() if infobox.has('group') else None,
'age_group': infobox.get('age_group').value.strip() if infobox.has('age_group') else '',
'target_group': infobox.get('target_group').value.strip() if infobox.has('target_group') else '',
'min_participants': int(infobox.get('min_participants').value.strip()) if infobox.has('min_participants') else 1,
'duration_minutes': int(infobox.get('duration').value.strip()) if infobox.has('duration') else 0,
'capabilities': {},
'category': category,
'purpose': '', 'execution': '', 'notes': '', 'preparation': '', 'method': '', 'equipment': []
}
# POST an Exercise-Endpoint
resp = requests.post(f"{os.getenv('APP_URL','http://localhost:8000')}/exercise", json=ex)
if resp.status_code == 200:
imported.append(resp.json().get('id'))
return {"imported": imported}