Trainer_LLM/llm-api/wiki_router1.1.9.py

173 lines
6.2 KiB
Python

"""
File: wiki_router.py
Beschreibung:
- Endpunkte für MediaWiki-Integration im lokalen Netzwerk.
- Funktionen:
* /health: Prüft Verfügbarkeit der MediaWiki-API.
* /login: Führt clientlogin durch und speichert Session-Cookies.
* /pages: Listet alle Übungen inkl. Unterkategorien via SMW-Ask.
* /parsepage: Ruft Roh-Wikitext über action=parse für eine Seite ab.
* /semantic/page: Liefert Metadaten einer Übung und Wikitext über parse.
Version: 1.1.9
"""
from dotenv import load_dotenv
load_dotenv()
from fastapi import APIRouter, HTTPException, Query
from pydantic import BaseModel
from typing import Dict, Any, List
import requests, os
__version__ = "1.1.9"
router = APIRouter()
WIKI_API_URL = os.getenv("WIKI_API_URL", "https://karatetrainer.net/api.php")
wiki_session = requests.Session()
class WikiLoginRequest(BaseModel):
username: str
password: str
class WikiLoginResponse(BaseModel):
status: str
message: str | None = None
class PageContentResponse(BaseModel):
pageid: int
title: str
wikitext: str
# Health-Check
@router.get("/health")
def health_check():
try:
resp = wiki_session.get(
WIKI_API_URL,
params={"action": "query", "meta": "siteinfo", "siprop": "general", "format": "json"},
timeout=5
)
resp.raise_for_status()
except Exception as e:
raise HTTPException(status_code=502, detail=f"Wiki nicht erreichbar: {e}")
return {"status": "ok"}
# Login Endpoint
@router.post("/login", response_model=WikiLoginResponse)
def login(data: WikiLoginRequest):
# Token holen
try:
token_resp = wiki_session.get(
WIKI_API_URL,
params={"action": "query", "meta": "tokens", "type": "login", "format": "json"},
timeout=10
)
token_resp.raise_for_status()
token = token_resp.json().get("query", {}).get("tokens", {}).get("logintoken")
except Exception as e:
raise HTTPException(status_code=502, detail=f"Token-Error: {e}")
if not token:
raise HTTPException(status_code=502, detail="Kein Login-Token erhalten")
# clientlogin
try:
login_resp = wiki_session.post(
WIKI_API_URL,
data={
"action": "clientlogin",
"format": "json",
"username": data.username,
"password": data.password,
"logintoken": token,
"loginreturnurl": "http://localhost:8000"
},
timeout=10
)
login_resp.raise_for_status()
status = login_resp.json().get("clientlogin", {}).get("status")
except Exception:
status = None
# fallback login
if status != "PASS":
alt = wiki_session.post(
WIKI_API_URL,
data={"action": "login", "format": "json", "lgname": data.username, "lgpassword": data.password},
timeout=10
)
alt.raise_for_status()
status = alt.json().get("login", {}).get("result")
if status in ("PASS", "Success"):
return WikiLoginResponse(status="success", message=None)
return WikiLoginResponse(status="failed", message="Login fehlgeschlagen")
# SMW-Ask: alle Übungen inkl. Unterkategorien
@router.get("/semantic/pages")
def semantic_pages(category: str = Query(..., description="Kategorie ohne 'Category:'")) -> Dict[str, Any]:
smw_query = f"[[Category:{category}]]"
ask_query = f"{smw_query}|limit=50000"
r = wiki_session.get(
WIKI_API_URL,
params={"action": "ask", "query": ask_query, "format": "json"},
timeout=30
)
try:
r.raise_for_status()
except Exception as e:
raise HTTPException(status_code=502, detail=f"SMW-Ask-Error: {e}")
return r.json().get("query", {}).get("results", {})
# Liste direkter Category Members (für pageid fallback)
@router.get("/pages")
def list_category_members(category: str = Query(..., description="Kategorie ohne 'Category:'")) -> List[Dict[str, Any]]:
cmtitle = f"Category:{category}"
params = {"action": "query", "list": "categorymembers", "cmtitle": cmtitle, "cmnamespace": 0, "cmlimit": 50000, "format": "json"}
r = wiki_session.get(WIKI_API_URL, params=params, timeout=10)
try:
r.raise_for_status()
except Exception as e:
raise HTTPException(status_code=502, detail=f"Kategorie-Error: {e}")
return r.json().get("query", {}).get("categorymembers", [])
# Wikitext über parse-Endpoint holen
@router.get("/parsepage", response_model=PageContentResponse)
def parse_page(pageid: int = Query(...), title: str = Query(None)):
r = wiki_session.get(
WIKI_API_URL,
params={"action": "parse", "pageid": pageid, "prop": "wikitext", "format": "json"},
timeout=20
)
try:
r.raise_for_status()
except Exception as e:
raise HTTPException(status_code=502, detail=f"Parse-Error: {e}")
wikitext = r.json().get("parse", {}).get("wikitext", {}).get("*", "")
return PageContentResponse(pageid=pageid, title=title or "", wikitext=wikitext)
# Detail-Endpoint für eine Übung: Metadaten aus Ask + Wikitext via parse (Titel)
@router.get("/semantic/page")
def semantic_page_detail(category: str = Query(...), title: str = Query(...)) -> Dict[str, Any]:
"""
Liefert Metadaten und Wikitext einer einzelnen Übung.
Nutzt SMW-Ask für Metadaten und den Parse-Endpoint per Titel für den Wikitext.
"""
# Metadaten aus SMW-Ask
entries = semantic_pages(category)
entry = entries.get(title)
if not entry:
raise HTTPException(status_code=404, detail="Übung nicht gefunden im SMW-Ask-Ergebnis.")
# Wikitext direkt über Parse-Endpoint per Titel laden
try:
r = wiki_session.get(
WIKI_API_URL,
params={"action": "parse", "page": title, "prop": "wikitext", "format": "json"},
timeout=20
)
r.raise_for_status()
wikitext = r.json().get("parse", {}).get("wikitext", {}).get("*", "")
except Exception as e:
raise HTTPException(status_code=502, detail=f"Parse-Error: {e}")
return {
"title": title,
"pageid": entry.get("pageid"),
"fullurl": entry.get("fullurl"),
"printouts": entry.get("printouts", {}),
"wikitext": wikitext
}