Trainer_LLM/llm-api/wiki_router1.2.0.py

174 lines
6.0 KiB
Python

"""
File: wiki_router.py
Beschreibung:
- Endpunkte für MediaWiki-Integration im lokalen Netzwerk.
- Funktionen:
* /health: Prüft Verfügbarkeit der MediaWiki-API.
* /login: Führt clientlogin durch und speichert Session-Cookies.
* /semantic/pages: Listet alle Übungen inkl. Unterkategorien via SMW-Ask.
* /parsepage: Ruft Roh-Wikitext über action=parse für eine Seite ab.
* /info: Liefert pageid und fullurl über Core-API Query.
* /semantic/page: Liefert Metadaten einer Übung und Wikitext sowie pageid über Core-API.
Version: 1.2.0
"""
from dotenv import load_dotenv
load_dotenv()
from fastapi import APIRouter, HTTPException, Query
from pydantic import BaseModel
from typing import Dict, Any, List
import requests, os
__version__ = "1.2.0"
router = APIRouter()
WIKI_API_URL = os.getenv("WIKI_API_URL", "https://karatetrainer.net/api.php")
wiki_session = requests.Session()
class WikiLoginRequest(BaseModel):
username: str
password: str
class WikiLoginResponse(BaseModel):
status: str
message: str | None = None
class PageContentResponse(BaseModel):
pageid: int
title: str
wikitext: str
class PageInfoResponse(BaseModel):
pageid: int
title: str
fullurl: str
# Health-Check
@router.get("/health")
def health_check():
try:
resp = wiki_session.get(
WIKI_API_URL,
params={"action": "query", "meta": "siteinfo", "siprop": "general", "format": "json"},
timeout=5
)
resp.raise_for_status()
except Exception as e:
raise HTTPException(status_code=502, detail=f"Wiki nicht erreichbar: {e}")
return {"status": "ok"}
# Login Endpoint
@router.post("/login", response_model=WikiLoginResponse)
def login(data: WikiLoginRequest):
# Token holen
try:
token_resp = wiki_session.get(
WIKI_API_URL,
params={"action": "query", "meta": "tokens", "type": "login", "format": "json"},
timeout=10
)
token_resp.raise_for_status()
token = token_resp.json().get("query", {}).get("tokens", {}).get("logintoken")
except Exception as e:
raise HTTPException(status_code=502, detail=f"Token-Error: {e}")
if not token:
raise HTTPException(status_code=502, detail="Kein Login-Token erhalten")
# clientlogin
try:
login_resp = wiki_session.post(
WIKI_API_URL,
data={
"action": "clientlogin",
"format": "json",
"username": data.username,
"password": data.password,
"logintoken": token,
"loginreturnurl": "http://localhost:8000"
},
timeout=10
)
login_resp.raise_for_status()
status = login_resp.json().get("clientlogin", {}).get("status")
except Exception:
status = None
# fallback login
if status != "PASS":
alt = wiki_session.post(
WIKI_API_URL,
data={"action": "login", "format": "json", "lgname": data.username, "lgpassword": data.password},
timeout=10
)
alt.raise_for_status()
status = alt.json().get("login", {}).get("result")
if status in ("PASS", "Success"):
return WikiLoginResponse(status="success", message=None)
return WikiLoginResponse(status="failed", message="Login fehlgeschlagen")
# SMW-Ask: alle Übungen inkl. Unterkategorien
@router.get("/semantic/pages")
def semantic_pages(category: str = Query(..., description="Kategorie ohne 'Category:'")) -> Dict[str, Any]:
smw_query = f"[[Category:{category}]]"
ask_query = f"{smw_query}|limit=50000"
r = wiki_session.get(
WIKI_API_URL,
params={"action": "ask", "query": ask_query, "format": "json"},
timeout=30
)
try:
r.raise_for_status()
except Exception as e:
raise HTTPException(status_code=502, detail=f"SMW-Ask-Error: {e}")
return r.json().get("query", {}).get("results", {})
# Wikitext über parse-Endpoint holen (per pageid)
@router.get("/parsepage", response_model=PageContentResponse)
def parse_page(pageid: int = Query(...), title: str = Query(None)):
r = wiki_session.get(
WIKI_API_URL,
params={"action": "parse", "pageid": pageid, "prop": "wikitext", "format": "json"},
timeout=20
)
try:
r.raise_for_status()
except Exception as e:
raise HTTPException(status_code=502, detail=f"Parse-Error: {e}")
wikitext = r.json().get("parse", {}).get("wikitext", {}).get("*", "")
return PageContentResponse(pageid=pageid, title=title or "", wikitext=wikitext)
# Pageinfo über Core-API (ermittelt pageid + fullurl)
@router.get("/info", response_model=PageInfoResponse)
def page_info(title: str = Query(..., description="Name der Seite")):
r = wiki_session.get(
WIKI_API_URL,
params={"action": "query", "titles": title, "prop": "info", "inprop": "url", "format": "json"},
timeout=10
)
try:
r.raise_for_status()
except Exception as e:
raise HTTPException(status_code=502, detail=f"Info-Error: {e}")
pages = r.json().get("query", {}).get("pages", {})
pid_str, page = next(iter(pages.items()))
pid = int(pid_str)
fullurl = page.get("fullurl")
return PageInfoResponse(pageid=pid, title=page.get("title"), fullurl=fullurl)
# Detail-Endpoint für eine Übung: Metadaten aus Ask + Wikitext & ID via Core-API
@router.get("/semantic/page", response_model=Dict[str, Any])
def semantic_page_detail(category: str = Query(...), title: str = Query(...)) -> Dict[str, Any]:
# Metadaten aus SMW-Ask
entries = semantic_pages(category)
entry = entries.get(title)
if not entry:
raise HTTPException(status_code=404, detail="Übung nicht gefunden im SMW-Ask-Ergebnis.")
# Pageinfo via Core-API
info = page_info(title=title)
# Wikitext via parse
parsed = parse_page(pageid=info.pageid, title=title)
return {
"title": title,
"pageid": info.pageid,
"fullurl": info.fullurl,
"printouts": entry.get("printouts", {}),
"wikitext": parsed.wikitext
}