""" File: wiki_router.py Beschreibung: - Endpunkte für MediaWiki-Integration im lokalen Netzwerk. - Funktionen: * /health: Prüft Verfügbarkeit der MediaWiki-API. * /login: Führt clientlogin durch und speichert Session-Cookies. * /semantic/pages: Listet alle Übungen inkl. Unterkategorien via SMW-Ask. * /parsepage: Ruft Roh-Wikitext über action=parse für eine Seite ab. * /info: Liefert pageid und fullurl über Core-API Query. # CHANGED: robustes 404-Handling * /semantic/page: Liefert Metadaten einer Übung, pageid und Wikitext. # CHANGED: propagiert 404 Version: 1.3.0 """ # HINWEIS: API-Signaturen/URLs bleiben UNVERÄNDERT. # Markierungen: # NEW / # CHANGED from typing import Dict, Any, Optional from dataclasses import dataclass from fastapi import APIRouter, HTTPException, Query from pydantic import BaseModel import os import requests from dotenv import load_dotenv load_dotenv() router = APIRouter(prefix="/import/wiki", tags=["wiki"]) WIKI_API_URL = os.getenv("WIKI_API_URL", "https://www.karatetrainer.de/api.php") # Session für Cookies (Login) wiki_session = requests.Session() wiki_session.headers.update({"User-Agent": "local-llm-wiki-proxy/1.3"}) # ===================== # Schemas # ===================== class WikiLoginRequest(BaseModel): username: str password: str class WikiLoginResponse(BaseModel): status: str message: Optional[str] = None class PageInfoResponse(BaseModel): pageid: int title: str fullurl: str class PageContentResponse(BaseModel): pageid: int title: str wikitext: str # ===================== # Helpers # ===================== # NEW: Title-Normalisierung (Unterstrich, Gedankenstrich) _dash_variants = ("-", "–", "—") def _normalize_titles(title: str): yield title # Leerzeichen → Unterstrich if " " in title: yield title.replace(" ", "_") # Gedankenstriche ↔ Bindestrich for dv in _dash_variants: for dv2 in _dash_variants: if dv != dv2 and dv in title: yield title.replace(dv, dv2) # NEW: Robustes Pageinfo (None wenn nicht gefunden) def _fetch_pageinfo_by_title(title: str) -> Optional[PageInfoResponse]: params = { "action": "query", "format": "json", "prop": "info", "inprop": "url", "redirects": 1, # folgt Weiterleitungen } for candidate in _normalize_titles(title): try: r = wiki_session.get(WIKI_API_URL, params={**params, "titles": candidate}, timeout=10) r.raise_for_status() except Exception as e: # Upstream gestört → 502 (aber nicht für nächste Candidate blockieren) raise HTTPException(status_code=502, detail=f"Info-Error: {e}") pages = r.json().get("query", {}).get("pages", {}) or {} if not isinstance(pages, dict) or not pages: continue # MediaWiki liefert dict {pageid(str): {..}} pid_str, page = next(iter(pages.items())) # Missing? if page.get("missing") is not None or str(pid_str) == "-1": continue title_out = page.get("title") or candidate fullurl = page.get("fullurl") or page.get("canonicalurl") or "" try: pid = int(pid_str) except ValueError: pid = int(page.get("pageid", -1)) return PageInfoResponse(pageid=pid, title=title_out, fullurl=fullurl) return None # ===================== # Endpoints # ===================== @router.get("/health") def health() -> Dict[str, str]: try: r = wiki_session.get(WIKI_API_URL, params={"action": "query", "meta": "siteinfo", "format": "json"}, timeout=5) r.raise_for_status() except Exception as e: raise HTTPException(status_code=502, detail=f"Wiki nicht erreichbar: {e}") return {"status": "ok"} @router.post("/login", response_model=WikiLoginResponse) def login(data: WikiLoginRequest): # Token holen try: token_resp = wiki_session.get( WIKI_API_URL, params={"action": "query", "meta": "tokens", "type": "login", "format": "json"}, timeout=10, ) token_resp.raise_for_status() token = token_resp.json().get("query", {}).get("tokens", {}).get("logintoken") except Exception as e: raise HTTPException(status_code=502, detail=f"Token-Error: {e}") if not token: raise HTTPException(status_code=502, detail="Kein Login-Token erhalten") # clientlogin try: login_resp = wiki_session.post( WIKI_API_URL, data={ "action": "clientlogin", "format": "json", "username": data.username, "password": data.password, "logintoken": token, }, timeout=15, ) login_resp.raise_for_status() status = login_resp.json().get("clientlogin", {}).get("status") if status != "PASS": raise HTTPException(status_code=401, detail=f"Login fehlgeschlagen: {status}") except HTTPException: raise except Exception as e: raise HTTPException(status_code=502, detail=f"Login-Error: {e}") return WikiLoginResponse(status="success") @router.get("/semantic/pages") def semantic_pages(category: str = Query(..., description="Kategorie ohne 'Category:'")) -> Dict[str, Any]: smw_query = f"[[Category:{category}]]" ask_query = f"{smw_query}|limit=50000" try: r = wiki_session.get(WIKI_API_URL, params={"action": "ask", "query": ask_query, "format": "json"}, timeout=30) r.raise_for_status() except Exception as e: raise HTTPException(status_code=502, detail=f"SMW-Ask-Error: {e}") return r.json().get("query", {}).get("results", {}) @router.get("/parsepage", response_model=PageContentResponse) def parse_page(pageid: int = Query(...), title: str = Query(None)): try: r = wiki_session.get(WIKI_API_URL, params={"action": "parse", "pageid": pageid, "prop": "wikitext", "format": "json"}, timeout=20) r.raise_for_status() except Exception as e: raise HTTPException(status_code=502, detail=f"Parse-Error: {e}") wikitext = r.json().get("parse", {}).get("wikitext", {}).get("*", "") return PageContentResponse(pageid=pageid, title=title or "", wikitext=wikitext) # CHANGED: robustes /info mit 404 statt 500 bei unbekannten Titeln @router.get("/info", response_model=PageInfoResponse) def page_info(title: str = Query(..., description="Seitentitel")): result = _fetch_pageinfo_by_title(title) if not result: # sauberes 404 statt StopIteration/500 raise HTTPException(status_code=404, detail=f"Page not found: {title}") return result # CHANGED: /semantic/page propagiert 404 sauber weiter @router.get("/semantic/page") def semantic_page(title: str = Query(...)) -> Dict[str, Any]: # SMW-Printouts beschaffen entries = semantic_pages(category="Übungen") # falls Titel→Kategorie-Mapping anders: hier anpassen entry = entries.get(title) if not entry: raise HTTPException(status_code=404, detail="Übung nicht gefunden im SMW-Ask-Ergebnis.") # Pageinfo & Wikitext holen info = page_info(title=title) # gibt 404 wenn unbekannt parsed = parse_page(pageid=info.pageid, title=title) return { "title": title, "pageid": info.pageid, "fullurl": info.fullurl, "printouts": entry.get("printouts", {}), "wikitext": parsed.wikitext, }