Trainer_LLM/llm-api/wiki_router1.1.5.py

169 lines
7.7 KiB
Python

"""
File: wiki_router.py
Beschreibung:
- Enthält Endpunkte für MediaWiki-Integration im lokalen Netzwerk.
- Funktionen:
* /health: Prüft Verfügbarkeit der MediaWiki-API.
* /login: Führt clientlogin durch und speichert Session-Cookies.
* /pages: Listet Seiten einer Kategorie (Artikel im Namespace 0).
* /pagecontent: Ruft Wikitext einer Seite ab.
* /semantic/pages: Führt SMW-Ask-Abfrage aus.
* /import/exercises: Importiert Übungen per Infobox-Parsing.
Version: 1.1.6
"""
from dotenv import load_dotenv
load_dotenv()
from fastapi import APIRouter, HTTPException, Query
from pydantic import BaseModel
from typing import List, Dict
import requests, os
# Version hochgezählt
__version__ = "1.1.6"
router = APIRouter()
# MediaWiki-Konfiguration
WIKI_API_URL = os.getenv("WIKI_API_URL", "https://karatetrainer.net/api.php")
WIKI_BOT_USER = os.getenv("WIKI_BOT_USER", "")
WIKI_BOT_PASSWORD = os.getenv("WIKI_BOT_PASSWORD", "")
wiki_session = requests.Session()
# Models
class WikiLoginRequest(BaseModel):
username: str
password: str
class WikiLoginResponse(BaseModel):
status: str
message: str | None = None
class CategoryMembersResponse(BaseModel):
pageid: int
title: str
class PageContentResponse(BaseModel):
pageid: int
title: str
content: str
# Health-Check
@router.get("/health")
def health_check():
try:
resp = wiki_session.get(WIKI_API_URL, params={"action": "query", "meta": "siteinfo", "siprop": "general", "format": "json"}, timeout=5)
resp.raise_for_status()
except Exception as e:
raise HTTPException(status_code=502, detail=f"Wiki nicht erreichbar: {e}")
return {"status": "ok"}
# Login Endpoint
@router.post("/login", response_model=WikiLoginResponse)
def login(data: WikiLoginRequest):
# clientlogin Token holen
try:
token_resp = wiki_session.get(WIKI_API_URL, params={"action":"query","meta":"tokens","type":"login","format":"json"}, timeout=10)
token_resp.raise_for_status()
token = token_resp.json().get("query",{}).get("tokens",{}).get("logintoken")
except Exception as e:
raise HTTPException(status_code=502, detail=f"Token-Error: {e}")
if not token:
raise HTTPException(status_code=502, detail="Kein Login-Token erhalten")
# clientlogin
try:
login_resp = wiki_session.post(WIKI_API_URL, data={"action":"clientlogin","format":"json","username":data.username,"password":data.password,"logintoken":token,"loginreturnurl":"http://localhost:8000"}, timeout=10)
login_resp.raise_for_status()
cl = login_resp.json().get("clientlogin", {})
except Exception as e:
raise HTTPException(status_code=502, detail=f"Login-Error: {e}")
if cl.get("status") == "PASS":
return WikiLoginResponse(status="success", message=None)
# fallback action=login
try:
alt = wiki_session.post(WIKI_API_URL, data={"action":"login","format":"json","lgname":data.username,"lgpassword":data.password}, timeout=10)
alt.raise_for_status()
res = alt.json().get("login",{})
if res.get("result") == "Success":
return WikiLoginResponse(status="success", message=None)
else:
return WikiLoginResponse(status="failed", message=res.get("reason"))
except Exception as e:
return WikiLoginResponse(status="failed", message=str(e))
# List category members (Namespace 0)
@router.get("/pages", response_model=List[CategoryMembersResponse])
def list_category_members(category: str = Query(..., description="Kategorie ohne 'Category:'")):
cmtitle = f"Category:{category}"
params = {"action":"query","list":"categorymembers","cmtitle":cmtitle,"cmnamespace":0,"cmlimit":50000,"format":"json"}
try:
r = wiki_session.get(WIKI_API_URL, params=params, timeout=10)
r.raise_for_status()
members = r.json().get("query",{}).get("categorymembers",[])
except Exception as e:
raise HTTPException(status_code=502, detail=f"Kategorie-Error: {e}")
return [CategoryMembersResponse(pageid=m["pageid"], title=m["title"]) for m in members]
# Fetch page content
@router.post("/pagecontent", response_model=PageContentResponse)
def get_page_content(pageid: int = Query(...), title: str = Query(None)):
params = {"action":"query","prop":"revisions","rvprop":"content","rvslots":"main","pageids":pageid,"format":"json"}
try:
r = wiki_session.get(WIKI_API_URL, params=params, timeout=10)
r.raise_for_status()
pages = r.json().get("query",{}).get("pages",{})
content = pages.get(str(pageid),{}).get("revisions",[{}])[0].get("slots",{}).get("main",{}).get("*","")
except Exception as e:
raise HTTPException(status_code=502, detail=f"Content-Error: {e}")
return PageContentResponse(pageid=pageid, title=title or pages[str(pageid)].get("title"), content=content)
# SMW-Ask query (rekursive Abfrage über Unterkategorien)
@router.get("/semantic/pages")
def semantic_category_members(category: str = Query(..., description="Kategorie ohne 'Category:'")) -> Dict:
"""
Führt eine rekursive SMW Ask-Abfrage durch, um strukturierte Daten für eine Kategorie inkl. Unterkategorien zu erhalten.
Limit und Format sind anpassbar.
"""
smw_query = f"[[Category:{category}]]"
# Ask-Parameter: query string mit Limit
ask_query = f"{smw_query}|limit=50000"
params = {
"action": "ask",
"query": ask_query,
"format": "json"
}
try:
r = wiki_session.get(WIKI_API_URL, params=params, timeout=20)
r.raise_for_status()
except Exception as e:
raise HTTPException(status_code=502, detail=f"SMW-Ask-Error: {e}")
return r.json()
# Import exercises
@router.get("/import/exercises", response_model=Dict[str,List[str]])
def import_exercises_get(category: str = Query(...)):
return import_exercises(category)
@router.post("/import/exercises", response_model=Dict[str,List[str]])
def import_exercises(category: str = Query(...)):
import mwparserfromhell
imported = []
for p in list_category_members(category):
pc = get_page_content(pageid=p.pageid, title=p.title)
wikicode = mwparserfromhell.parse(pc.content)
infobox = next((t for t in wikicode.filter_templates() if t.name.strip()=="ÜbungInfoBox"),None)
if not infobox:
continue
ex = { 'title': infobox.get('title').value.strip() if infobox.has('title') else p.title,
'summary': infobox.get('summary').value.strip() if infobox.has('summary') else '',
'short_description': infobox.get('short_description').value.strip() if infobox.has('short_description') else '',
'keywords':[kw.strip() for kw in infobox.get('keywords').value.split(',')] if infobox.has('keywords') else [],
'link':None,'discipline':infobox.get('discipline').value.strip() if infobox.has('discipline') else '',
'group':infobox.get('group').value.strip() if infobox.has('group') else None,'age_group':infobox.get('age_group').value.strip() if infobox.has('age_group') else '',
'target_group':infobox.get('target_group').value.strip() if infobox.has('target_group') else '','min_participants':int(infobox.get('min_participants').value.strip()) if infobox.has('min_participants') else 1,
'duration_minutes':int(infobox.get('duration').value.strip()) if infobox.has('duration') else 0,'capabilities':{},'category':category,
'purpose':'','execution':'','notes':'','preparation':'','method':'','equipment':[] }
resp = requests.post(f"{os.getenv('APP_URL','http://localhost:8000')}/exercise", json=ex)
if resp.status_code==200:
imported.append(resp.json().get('id'))
return {"imported":imported}