llm-api/wiki_router.py aktualisiert
All checks were successful
Deploy Trainer_LLM to llm-node / deploy (push) Successful in 2s
All checks were successful
Deploy Trainer_LLM to llm-node / deploy (push) Successful in 2s
This commit is contained in:
parent
508fafd0df
commit
9327bc48d8
|
|
@ -1,20 +1,16 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
"""
|
"""
|
||||||
wiki_router.py – v1.4.2 (Swagger angereichert)
|
wiki_router.py – v1.4.3 (Swagger + robustes .env + optionaler ENV-Login)
|
||||||
|
|
||||||
Änderungen ggü. v1.4.1:
|
Änderungen ggü. v1.4.2:
|
||||||
- Alle Endpunkte mit aussagekräftigem `summary`/`description`/`response_description` versehen
|
- **/login/env** hinzugefügt: Login mit WIKI_BOT_USER/WIKI_BOT_PASSWORD aus ENV (Secrets werden nie ausgegeben)
|
||||||
- Parameter-Beschreibungen ergänzt (z. B. `verbose`, `category`, `title`)
|
- .env-Bootstrap robuster und **vor** dem ersten Aufruf geloggt
|
||||||
- Beispiele über `x-codeSamples` (cURL) und `json_schema_extra`
|
- /.meta/env/runtime um Credentials-Flags ergänzt (ohne Klartext)
|
||||||
- **Keine API-Signaturänderungen**
|
- response_description-Strings mit JSON-Beispielen sauber gequotet
|
||||||
|
- Keine Breaking-Changes (Signaturen & Pfade unverändert)
|
||||||
|
|
||||||
Ziele:
|
Prefix-Hinweis:
|
||||||
- /semantic/pages reichert pageid/fullurl für ALLE Titel batchweise an (redirects=1, converttitles=1)
|
- Der Router setzt `prefix="/import/wiki"`. In `llm_api.py` **ohne** weiteren Prefix einbinden.
|
||||||
- /info robust: 404 statt 500, mit Titel-Varianten (Leerzeichen/Unterstrich/Bindestrich)
|
|
||||||
- Wiederholungen & Throttling gegen MediaWiki (WIKI_RETRIES, WIKI_SLEEP_MS)
|
|
||||||
- Optional: Diagnose-Ausgaben (verbose) und Coverage-Kennzahlen (Logs)
|
|
||||||
|
|
||||||
Hinweis Prefix:
|
|
||||||
- Der Router setzt `prefix="/import/wiki"`. In `llm_api.py` **ohne** weiteren Prefix einbinden, sonst entstehen Doppelpfade.
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from typing import Dict, Any, Optional, List
|
from typing import Dict, Any, Optional, List
|
||||||
|
|
@ -23,17 +19,64 @@ from pydantic import BaseModel, Field
|
||||||
from textwrap import dedent
|
from textwrap import dedent
|
||||||
import os, time, logging
|
import os, time, logging
|
||||||
import requests
|
import requests
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv, find_dotenv
|
||||||
from starlette.responses import PlainTextResponse
|
from starlette.responses import PlainTextResponse
|
||||||
|
|
||||||
load_dotenv()
|
# -------------------------------------------------
|
||||||
|
# Logging **vor** .env-Bootstrap initialisieren
|
||||||
|
# -------------------------------------------------
|
||||||
logger = logging.getLogger("wiki_router")
|
logger = logging.getLogger("wiki_router")
|
||||||
logger.setLevel(logging.INFO)
|
logger.setLevel(logging.INFO)
|
||||||
|
|
||||||
|
# -------------------------------------------------
|
||||||
|
# Robustes .env-Loading (findet Datei auch außerhalb des CWD)
|
||||||
|
# -------------------------------------------------
|
||||||
|
|
||||||
|
def _bootstrap_env() -> Optional[str]:
|
||||||
|
"""Versucht mehrere typische Pfade für .env zu laden und loggt die Fundstelle.
|
||||||
|
Reihenfolge:
|
||||||
|
1) env `LLMAPI_ENV_FILE`
|
||||||
|
2) find_dotenv() relativ zum CWD
|
||||||
|
3) CWD/.env
|
||||||
|
4) Verzeichnis dieser Datei /.env
|
||||||
|
5) $HOME/.env
|
||||||
|
6) $HOME/.llm-api.env
|
||||||
|
7) /etc/llm-api.env
|
||||||
|
"""
|
||||||
|
candidates: List[str] = []
|
||||||
|
if os.getenv("LLMAPI_ENV_FILE"):
|
||||||
|
candidates.append(os.getenv("LLMAPI_ENV_FILE") or "")
|
||||||
|
fd = find_dotenv(".env", usecwd=True)
|
||||||
|
if fd:
|
||||||
|
candidates.append(fd)
|
||||||
|
candidates += [
|
||||||
|
os.path.join(os.getcwd(), ".env"),
|
||||||
|
os.path.join(os.path.dirname(__file__), ".env"),
|
||||||
|
os.path.expanduser("~/.env"),
|
||||||
|
os.path.expanduser("~/.llm-api.env"),
|
||||||
|
"/etc/llm-api.env",
|
||||||
|
]
|
||||||
|
for path in candidates:
|
||||||
|
try:
|
||||||
|
if path and os.path.exists(path):
|
||||||
|
loaded = load_dotenv(path, override=False)
|
||||||
|
if loaded:
|
||||||
|
logger.info("wiki_router: .env geladen aus %s", path)
|
||||||
|
return path
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning("wiki_router: .env laden fehlgeschlagen (%s): %s", path, e)
|
||||||
|
logger.info("wiki_router: keine .env gefunden – verwende Prozess-Umgebung")
|
||||||
|
return None
|
||||||
|
|
||||||
|
_BOOTSTRAP_ENV = _bootstrap_env()
|
||||||
|
|
||||||
|
# -------------------------------------------------
|
||||||
|
# Router & Konfiguration
|
||||||
|
# -------------------------------------------------
|
||||||
router = APIRouter(prefix="/import/wiki", tags=["wiki"])
|
router = APIRouter(prefix="/import/wiki", tags=["wiki"])
|
||||||
|
|
||||||
# -------- Konfiguration --------
|
# Hinweis: Werte werden NACH dem .env-Bootstrap aus os.environ gelesen.
|
||||||
|
# Änderungen an .env erfordern i. d. R. einen Neustart des Dienstes.
|
||||||
WIKI_API_URL = os.getenv("WIKI_API_URL", "https://karatetrainer.net/api.php")
|
WIKI_API_URL = os.getenv("WIKI_API_URL", "https://karatetrainer.net/api.php")
|
||||||
WIKI_TIMEOUT = float(os.getenv("WIKI_TIMEOUT", "15"))
|
WIKI_TIMEOUT = float(os.getenv("WIKI_TIMEOUT", "15"))
|
||||||
WIKI_BATCH = int(os.getenv("WIKI_BATCH", "50"))
|
WIKI_BATCH = int(os.getenv("WIKI_BATCH", "50"))
|
||||||
|
|
@ -42,18 +85,15 @@ WIKI_SLEEPMS = int(os.getenv("WIKI_SLEEP_MS", "0")) # Throttle zwischen Requ
|
||||||
|
|
||||||
# Single Session (Cookies für Login)
|
# Single Session (Cookies für Login)
|
||||||
wiki_session = requests.Session()
|
wiki_session = requests.Session()
|
||||||
wiki_session.headers.update({"User-Agent": "local-llm-wiki-proxy/1.4.2"})
|
wiki_session.headers.update({"User-Agent": "local-llm-wiki-proxy/1.4.3"})
|
||||||
|
|
||||||
# -------- Schemas --------
|
# -------------------------------------------------
|
||||||
|
# Schemas
|
||||||
|
# -------------------------------------------------
|
||||||
class WikiLoginRequest(BaseModel):
|
class WikiLoginRequest(BaseModel):
|
||||||
username: str = Field(..., description="MediaWiki-Benutzername (kein .env-Wert)")
|
username: str = Field(..., description="MediaWiki-Benutzername (kein .env-Wert)")
|
||||||
password: str = Field(..., description="MediaWiki-Passwort (kein .env-Wert)")
|
password: str = Field(..., description="MediaWiki-Passwort (kein .env-Wert)")
|
||||||
|
model_config = {"json_schema_extra": {"example": {"username": "Bot", "password": "••••••"}}}
|
||||||
model_config = {
|
|
||||||
"json_schema_extra": {
|
|
||||||
"example": {"username": "Bot", "password": "••••••"}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
class WikiLoginResponse(BaseModel):
|
class WikiLoginResponse(BaseModel):
|
||||||
status: str = Field(..., description="'success' bei erfolgreichem Login")
|
status: str = Field(..., description="'success' bei erfolgreichem Login")
|
||||||
|
|
@ -63,25 +103,17 @@ class PageInfoResponse(BaseModel):
|
||||||
pageid: int = Field(..., description="Eindeutige PageID der MediaWiki-Seite")
|
pageid: int = Field(..., description="Eindeutige PageID der MediaWiki-Seite")
|
||||||
title: str = Field(..., description="Aufgelöster Titel (kann von Eingabe abweichen, z. B. Redirect/Normalize)")
|
title: str = Field(..., description="Aufgelöster Titel (kann von Eingabe abweichen, z. B. Redirect/Normalize)")
|
||||||
fullurl: str = Field(..., description="Kanonsiche URL zur Seite")
|
fullurl: str = Field(..., description="Kanonsiche URL zur Seite")
|
||||||
|
model_config = {"json_schema_extra": {"example": {"pageid": 218, "title": "Affenklatschen", "fullurl": "https://…/index.php?title=Affenklatschen"}}}
|
||||||
model_config = {
|
|
||||||
"json_schema_extra": {
|
|
||||||
"example": {"pageid": 218, "title": "Affenklatschen", "fullurl": "https://…/index.php?title=Affenklatschen"}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
class PageContentResponse(BaseModel):
|
class PageContentResponse(BaseModel):
|
||||||
pageid: int = Field(..., description="PageID der angefragten Seite")
|
pageid: int = Field(..., description="PageID der angefragten Seite")
|
||||||
title: str = Field(..., description="Echo des mitgegebenen Titels (optional)")
|
title: str = Field(..., description="Echo des mitgegebenen Titels (optional)")
|
||||||
wikitext: str = Field(..., description="Roh-Wikitext (inkl. Templates), keine Sanitization")
|
wikitext: str = Field(..., description="Roh-Wikitext (inkl. Templates), keine Sanitization")
|
||||||
|
model_config = {"json_schema_extra": {"example": {"pageid": 218, "title": "Affenklatschen", "wikitext": "{{ÜbungInfoBox|…}}"}}}
|
||||||
|
|
||||||
model_config = {
|
# -------------------------------------------------
|
||||||
"json_schema_extra": {
|
# Utils
|
||||||
"example": {"pageid": 218, "title": "Affenklatschen", "wikitext": "{{ÜbungInfoBox|…}}"}
|
# -------------------------------------------------
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
# -------- Utils --------
|
|
||||||
|
|
||||||
def _sleep():
|
def _sleep():
|
||||||
if WIKI_SLEEPMS > 0:
|
if WIKI_SLEEPMS > 0:
|
||||||
|
|
@ -164,7 +196,9 @@ def _fetch_pageinfo_batch(titles: List[str]) -> Dict[str, Dict[str, Any]]:
|
||||||
_sleep()
|
_sleep()
|
||||||
return out
|
return out
|
||||||
|
|
||||||
# -------- Doku-Konstanten (Markdown/.env) --------
|
# -------------------------------------------------
|
||||||
|
# Doku-Konstanten (Markdown/.env)
|
||||||
|
# -------------------------------------------------
|
||||||
MANUAL_WIKI_IMPORTER = dedent("""
|
MANUAL_WIKI_IMPORTER = dedent("""
|
||||||
# wiki_importer.py – Kurzanleitung
|
# wiki_importer.py – Kurzanleitung
|
||||||
|
|
||||||
|
|
@ -215,9 +249,14 @@ ENV_DOC = [
|
||||||
{"name": "WIKI_TIMEOUT", "desc": "Timeout in Sekunden (Default 15)"},
|
{"name": "WIKI_TIMEOUT", "desc": "Timeout in Sekunden (Default 15)"},
|
||||||
{"name": "WIKI_RETRIES", "desc": "Anzahl zusätzlicher Versuche (Default 1)"},
|
{"name": "WIKI_RETRIES", "desc": "Anzahl zusätzlicher Versuche (Default 1)"},
|
||||||
{"name": "WIKI_SLEEP_MS", "desc": "Throttle zwischen Requests in Millisekunden (Default 0)"},
|
{"name": "WIKI_SLEEP_MS", "desc": "Throttle zwischen Requests in Millisekunden (Default 0)"},
|
||||||
|
{"name": "WIKI_BATCH", "desc": "Batchgröße für Titel-Enrichment (Default 50)"},
|
||||||
|
{"name": "WIKI_BOT_USER", "desc": "(optional) Benutzername für /login/env – **Wert wird nie im Klartext zurückgegeben**"},
|
||||||
|
{"name": "WIKI_BOT_PASSWORD", "desc": "(optional) Passwort für /login/env – **Wert wird nie im Klartext zurückgegeben**"},
|
||||||
]
|
]
|
||||||
|
|
||||||
# -------- Endpoints --------
|
# -------------------------------------------------
|
||||||
|
# Doku-/Meta-Endpunkte
|
||||||
|
# -------------------------------------------------
|
||||||
@router.get(
|
@router.get(
|
||||||
"/manual/wiki_importer",
|
"/manual/wiki_importer",
|
||||||
summary="Handbuch: wiki_importer.py (Markdown)",
|
summary="Handbuch: wiki_importer.py (Markdown)",
|
||||||
|
|
@ -226,18 +265,10 @@ ENV_DOC = [
|
||||||
response_description="Markdown-Text.",
|
response_description="Markdown-Text.",
|
||||||
openapi_extra={
|
openapi_extra={
|
||||||
"x-codeSamples": [
|
"x-codeSamples": [
|
||||||
{
|
{"lang": "bash", "label": "Vollimport (Standard)", "source": "python3 wiki_importer.py --all"},
|
||||||
"lang": "bash",
|
{"lang": "bash", "label": "Dry-Run + Kategorie", "source": "python3 wiki_importer.py --all --category \"Übungen\" --dry-run"},
|
||||||
"label": "Vollimport (Standard)",
|
|
||||||
"source": "python3 wiki_importer.py --all"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"lang": "bash",
|
|
||||||
"label": "Dry-Run + Kategorie",
|
|
||||||
"source": "python3 wiki_importer.py --all --category \"Übungen\" --dry-run"
|
|
||||||
}
|
|
||||||
]
|
]
|
||||||
}
|
},
|
||||||
)
|
)
|
||||||
def manual_wiki_importer():
|
def manual_wiki_importer():
|
||||||
return MANUAL_WIKI_IMPORTER
|
return MANUAL_WIKI_IMPORTER
|
||||||
|
|
@ -252,7 +283,30 @@ def manual_wiki_importer():
|
||||||
def meta_env() -> List[Dict[str, str]]:
|
def meta_env() -> List[Dict[str, str]]:
|
||||||
return ENV_DOC
|
return ENV_DOC
|
||||||
|
|
||||||
# -------- Endpoints --------
|
|
||||||
|
@router.get(
|
||||||
|
"/meta/env/runtime",
|
||||||
|
summary=".env Runtime (wirksame Werte)",
|
||||||
|
description="Zeigt die aktuell wirksamen Konfigurationswerte für den Wiki-Router (ohne Secrets) und die geladene .env-Quelle.",
|
||||||
|
response_description="Objekt mit 'loaded_from' und 'env' (Key→Value).",
|
||||||
|
)
|
||||||
|
def meta_env_runtime() -> Dict[str, Any]:
|
||||||
|
keys = ["WIKI_API_URL", "WIKI_TIMEOUT", "WIKI_RETRIES", "WIKI_SLEEP_MS", "WIKI_BATCH"]
|
||||||
|
has_user = bool(os.getenv("WIKI_BOT_USER"))
|
||||||
|
has_pwd = bool(os.getenv("WIKI_BOT_PASSWORD"))
|
||||||
|
return {
|
||||||
|
"loaded_from": _BOOTSTRAP_ENV,
|
||||||
|
"env": {k: os.getenv(k) for k in keys},
|
||||||
|
"credentials": {
|
||||||
|
"WIKI_BOT_USER_set": has_user,
|
||||||
|
"WIKI_BOT_PASSWORD_set": has_pwd,
|
||||||
|
"ready_for_login_env": has_user and has_pwd,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
# -------------------------------------------------
|
||||||
|
# API-Endpunkte
|
||||||
|
# -------------------------------------------------
|
||||||
@router.get(
|
@router.get(
|
||||||
"/health",
|
"/health",
|
||||||
summary="Ping & Site-Info des MediaWiki-Upstreams",
|
summary="Ping & Site-Info des MediaWiki-Upstreams",
|
||||||
|
|
@ -268,12 +322,12 @@ def meta_env() -> List[Dict[str, str]]:
|
||||||
**Hinweis**: Je nach Wiki-Konfiguration sind detaillierte Infos (Generator/Sitename) nur **nach Login** sichtbar.
|
**Hinweis**: Je nach Wiki-Konfiguration sind detaillierte Infos (Generator/Sitename) nur **nach Login** sichtbar.
|
||||||
"""
|
"""
|
||||||
),
|
),
|
||||||
response_description="`{\"status\":\"ok\"}` oder mit `wiki.sitename/generator` bei `verbose=1`.",
|
response_description='`{"status":"ok"}` oder mit `wiki.sitename/generator` bei `verbose=1`.',
|
||||||
openapi_extra={
|
openapi_extra={
|
||||||
"x-codeSamples": [
|
"x-codeSamples": [
|
||||||
{"lang": "bash", "label": "curl", "source": "curl -s 'http://localhost:8000/import/wiki/health?verbose=1' | jq ."}
|
{"lang": "bash", "label": "curl", "source": "curl -s 'http://localhost:8000/import/wiki/health?verbose=1' | jq ."}
|
||||||
]
|
]
|
||||||
}
|
},
|
||||||
)
|
)
|
||||||
def health(verbose: Optional[int] = Query(default=0, description="1 = Site-Metadaten (sitename/generator) mitsenden")) -> Dict[str, Any]:
|
def health(verbose: Optional[int] = Query(default=0, description="1 = Site-Metadaten (sitename/generator) mitsenden")) -> Dict[str, Any]:
|
||||||
resp = _request_with_retry("GET", {"action": "query", "meta": "siteinfo", "format": "json"})
|
resp = _request_with_retry("GET", {"action": "query", "meta": "siteinfo", "format": "json"})
|
||||||
|
|
@ -300,7 +354,7 @@ def health(verbose: Optional[int] = Query(default=0, description="1 = Site-Metad
|
||||||
- Respektiert Retry/Throttle aus `.env`.
|
- Respektiert Retry/Throttle aus `.env`.
|
||||||
"""
|
"""
|
||||||
),
|
),
|
||||||
response_description="`{\"status\":\"success\"}` bei Erfolg."
|
response_description='`{"status":"success"}` bei Erfolg.',
|
||||||
)
|
)
|
||||||
def login(data: WikiLoginRequest):
|
def login(data: WikiLoginRequest):
|
||||||
# Token holen
|
# Token holen
|
||||||
|
|
@ -338,6 +392,33 @@ def login(data: WikiLoginRequest):
|
||||||
raise HTTPException(status_code=401, detail=f"Login fehlgeschlagen: {res}")
|
raise HTTPException(status_code=401, detail=f"Login fehlgeschlagen: {res}")
|
||||||
|
|
||||||
|
|
||||||
|
@router.post(
|
||||||
|
"/login/env",
|
||||||
|
response_model=WikiLoginResponse,
|
||||||
|
summary="MediaWiki-Login mit .env-Credentials",
|
||||||
|
description=dedent(
|
||||||
|
"""
|
||||||
|
Führt den Login mit **WIKI_BOT_USER/WIKI_BOT_PASSWORD** aus der Prozess-Umgebung durch.
|
||||||
|
Praktisch für geplante Jobs/CLI ohne Übergabe im Body. Secrets werden **nie** im Klartext zurückgegeben.
|
||||||
|
|
||||||
|
**Voraussetzung**: Beide Variablen sind gesetzt (siehe `/import/wiki/meta/env/runtime`).
|
||||||
|
"""
|
||||||
|
),
|
||||||
|
response_description='`{"status":"success"}` bei Erfolg.',
|
||||||
|
openapi_extra={
|
||||||
|
"x-codeSamples": [
|
||||||
|
{"lang": "bash", "label": "curl", "source": "curl -s -X POST http://localhost:8000/import/wiki/login/env | jq ."}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
)
|
||||||
|
def login_env():
|
||||||
|
user = os.getenv("WIKI_BOT_USER")
|
||||||
|
pwd = os.getenv("WIKI_BOT_PASSWORD")
|
||||||
|
if not user or not pwd:
|
||||||
|
raise HTTPException(status_code=400, detail="WIKI_BOT_USER/WIKI_BOT_PASSWORD nicht gesetzt")
|
||||||
|
return login(WikiLoginRequest(username=user, password=pwd))
|
||||||
|
|
||||||
|
|
||||||
@router.get(
|
@router.get(
|
||||||
"/semantic/pages",
|
"/semantic/pages",
|
||||||
summary="SMW-Ask-Ergebnisse einer Kategorie mit PageID/URL anreichern",
|
summary="SMW-Ask-Ergebnisse einer Kategorie mit PageID/URL anreichern",
|
||||||
|
|
@ -359,16 +440,14 @@ def login(data: WikiLoginRequest):
|
||||||
"x-codeSamples": [
|
"x-codeSamples": [
|
||||||
{"lang": "bash", "label": "curl", "source": "curl -s 'http://localhost:8000/import/wiki/semantic/pages?category=%C3%9Cbungen' | jq . | head"}
|
{"lang": "bash", "label": "curl", "source": "curl -s 'http://localhost:8000/import/wiki/semantic/pages?category=%C3%9Cbungen' | jq . | head"}
|
||||||
]
|
]
|
||||||
}
|
},
|
||||||
)
|
)
|
||||||
def semantic_pages(category: str = Query(..., description="Kategorie-Name **ohne** 'Category:' Präfix")) -> Dict[str, Any]:
|
def semantic_pages(category: str = Query(..., description="Kategorie-Name **ohne** 'Category:' Präfix")) -> Dict[str, Any]:
|
||||||
# Rohdaten aus SMW (Ask)
|
|
||||||
ask_query = f"[[Category:{category}]]|limit=50000"
|
ask_query = f"[[Category:{category}]]|limit=50000"
|
||||||
r = _request_with_retry("GET", {"action": "ask", "query": ask_query, "format": "json"})
|
r = _request_with_retry("GET", {"action": "ask", "query": ask_query, "format": "json"})
|
||||||
results = r.json().get("query", {}).get("results", {}) or {}
|
results = r.json().get("query", {}).get("results", {}) or {}
|
||||||
titles = list(results.keys())
|
titles = list(results.keys())
|
||||||
|
|
||||||
# Batch-Anreicherung mit pageid/fullurl für ALLE Titel
|
|
||||||
info_map = _fetch_pageinfo_batch(titles)
|
info_map = _fetch_pageinfo_batch(titles)
|
||||||
|
|
||||||
enriched: Dict[str, Any] = {}
|
enriched: Dict[str, Any] = {}
|
||||||
|
|
@ -408,7 +487,7 @@ def semantic_pages(category: str = Query(..., description="Kategorie-Name **ohne
|
||||||
"x-codeSamples": [
|
"x-codeSamples": [
|
||||||
{"lang": "bash", "label": "curl", "source": "curl -s 'http://localhost:8000/import/wiki/parsepage?pageid=218&title=Affenklatschen' | jq ."}
|
{"lang": "bash", "label": "curl", "source": "curl -s 'http://localhost:8000/import/wiki/parsepage?pageid=218&title=Affenklatschen' | jq ."}
|
||||||
]
|
]
|
||||||
}
|
},
|
||||||
)
|
)
|
||||||
def parse_page(pageid: int = Query(..., description="Numerische PageID der Seite"), title: str = Query(None, description="Optional: Seitentitel (nur Echo)")):
|
def parse_page(pageid: int = Query(..., description="Numerische PageID der Seite"), title: str = Query(None, description="Optional: Seitentitel (nur Echo)")):
|
||||||
resp = _request_with_retry("GET", {"action": "parse", "pageid": pageid, "prop": "wikitext", "format": "json"})
|
resp = _request_with_retry("GET", {"action": "parse", "pageid": pageid, "prop": "wikitext", "format": "json"})
|
||||||
|
|
@ -436,16 +515,14 @@ def parse_page(pageid: int = Query(..., description="Numerische PageID der Seite
|
||||||
"x-codeSamples": [
|
"x-codeSamples": [
|
||||||
{"lang": "bash", "label": "curl", "source": "curl -s 'http://localhost:8000/import/wiki/info?title=Affenklatschen' | jq ."}
|
{"lang": "bash", "label": "curl", "source": "curl -s 'http://localhost:8000/import/wiki/info?title=Affenklatschen' | jq ."}
|
||||||
]
|
]
|
||||||
}
|
},
|
||||||
)
|
)
|
||||||
def page_info(title: str = Query(..., description="Seitentitel (unscharf; Varianten werden versucht)")):
|
def page_info(title: str = Query(..., description="Seitentitel (unscharf; Varianten werden versucht)")):
|
||||||
# 1. Versuch: wie geliefert, mit redirects/converttitles
|
|
||||||
res = _fetch_pageinfo_batch([title])
|
res = _fetch_pageinfo_batch([title])
|
||||||
if res.get(title):
|
if res.get(title):
|
||||||
d = res[title]
|
d = res[title]
|
||||||
return PageInfoResponse(pageid=d["pageid"], title=title, fullurl=d.get("fullurl", ""))
|
return PageInfoResponse(pageid=d["pageid"], title=title, fullurl=d.get("fullurl", ""))
|
||||||
|
|
||||||
# 2. Varianten probieren
|
|
||||||
for v in _normalize_variants(title):
|
for v in _normalize_variants(title):
|
||||||
if v == title:
|
if v == title:
|
||||||
continue
|
continue
|
||||||
|
|
@ -454,5 +531,4 @@ def page_info(title: str = Query(..., description="Seitentitel (unscharf; Varian
|
||||||
d = res2[v]
|
d = res2[v]
|
||||||
return PageInfoResponse(pageid=d["pageid"], title=v, fullurl=d.get("fullurl", ""))
|
return PageInfoResponse(pageid=d["pageid"], title=v, fullurl=d.get("fullurl", ""))
|
||||||
|
|
||||||
# 3. sauber 404
|
|
||||||
raise HTTPException(status_code=404, detail=f"Page not found: {title}")
|
raise HTTPException(status_code=404, detail=f"Page not found: {title}")
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user