diff --git a/.env.example b/.env.example
new file mode 100644
index 0000000..c07033f
--- /dev/null
+++ b/.env.example
@@ -0,0 +1,3 @@
+# OpenRouter API-Key für Scout (LLM-Auswertung der Links)
+# Erstellen: https://openrouter.ai/keys
+OPENROUTER_API_KEY=sk-or-v1-
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..96d4112
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,13 @@
+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+.venv/
+venv/
+
+# Umgebungsvariablen (WICHTIG!)
+.env
+
+# Docker & Playwright
+.pytest_cache/
+browser_data/
\ No newline at end of file
diff --git a/Dockerfile.worker b/Dockerfile.worker
new file mode 100644
index 0000000..54b66b6
--- /dev/null
+++ b/Dockerfile.worker
@@ -0,0 +1,27 @@
+# Scout-Modul CIA: FastAPI + Playwright (Browser)
+FROM python:3.12-slim
+
+WORKDIR /app
+
+# System-Pakete für Playwright (Chromium)
+RUN apt-get update && apt-get install -y --no-install-recommends \
+ libnss3 libnspr4 libatk1.0-0 libatk-bridge2.0-0 libcups2 \
+ libdrm2 libxkbcommon0 libxcomposite1 libxdamage1 libxfixes3 \
+ libxrandr2 libgbm1 libasound2 libpango-1.0-0 libcairo2 \
+ wget ca-certificates fonts-liberation \
+ && rm -rf /var/lib/apt/lists/*
+
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+
+# Playwright-Browser (Chromium) installieren
+RUN playwright install chromium && playwright install-deps chromium
+
+COPY src/ ./src/
+WORKDIR /app/src
+
+# Port für FastAPI
+EXPOSE 8000
+
+ENV PYTHONUNBUFFERED=1
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
diff --git a/docker-compose.yml b/docker-compose.yml
new file mode 100644
index 0000000..dc02254
--- /dev/null
+++ b/docker-compose.yml
@@ -0,0 +1,13 @@
+# CIA Scout-Modul: FastAPI-Service auf Port 8000
+services:
+ scout:
+ build:
+ context: .
+ dockerfile: Dockerfile.worker
+ ports:
+ - "8000:8000"
+ env_file:
+ - .env
+ environment:
+ - OPENROUTER_API_KEY=${OPENROUTER_API_KEY}
+ restart: unless-stopped
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..67eaf9f
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,8 @@
+# Scout-Modul CIA – Abhängigkeiten
+fastapi>=0.109.0
+uvicorn[standard]>=0.27.0
+playwright>=1.41.0
+beautifulsoup4>=4.12.0
+httpx>=0.26.0
+python-dotenv>=1.0.0
+pydantic>=2.5.0
diff --git a/src/main.py b/src/main.py
new file mode 100644
index 0000000..332422c
--- /dev/null
+++ b/src/main.py
@@ -0,0 +1,59 @@
+"""
+Scout-Modul CIA: FastAPI-Service zum Erkennen von Publikations-/Insights-URLs pro Domain.
+"""
+import os
+
+from dotenv import load_dotenv
+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel, Field
+
+from scout_logic import get_publication_url
+
+# Umgebungsvariablen aus .env laden (OPENROUTER_API_KEY)
+load_dotenv()
+
+app = FastAPI(
+ title="CIA Scout",
+ description="Erkennt Publikations-/Insights-Seiten von Beratungsdomains.",
+ version="0.1.0",
+)
+
+
+class DiscoverRequest(BaseModel):
+ """Request-Body für /discover."""
+
+ domain: str = Field(..., min_length=1, description="Domain, z.B. mckinsey.com")
+
+
+class DiscoverResponse(BaseModel):
+ """Response: gefundene Publikations-URL oder Fehler."""
+
+ url: str | None = Field(None, description="Gefundene absolute URL für Reports/Insights")
+ error: str | None = Field(None, description="Fehlermeldung, falls kein Ergebnis")
+
+
+@app.get("/health")
+async def health():
+ """Einfacher Health-Check."""
+ return {"status": "ok"}
+
+
+@app.post("/discover", response_model=DiscoverResponse)
+async def discover(body: DiscoverRequest) -> DiscoverResponse:
+ """
+ Domain übergeben; Service scannt die Startseite mit Playwright,
+ extrahiert Links und lässt OpenRouter die beste Publikations-URL wählen.
+ """
+ if not os.getenv("OPENROUTER_API_KEY"):
+ raise HTTPException(
+ status_code=503,
+ detail="OPENROUTER_API_KEY nicht gesetzt (z.B. in .env)",
+ )
+
+ result = await get_publication_url(body.domain)
+ return DiscoverResponse(url=result["url"], error=result.get("error"))
+
+
+if __name__ == "__main__":
+ import uvicorn
+ uvicorn.run(app, host="0.0.0.0", port=8000)
diff --git a/src/scout_logic.py b/src/scout_logic.py
new file mode 100644
index 0000000..454b7cc
--- /dev/null
+++ b/src/scout_logic.py
@@ -0,0 +1,141 @@
+"""
+Scout-Logik: Domain scannen, Links extrahieren, via OpenRouter Publikations-URL identifizieren.
+"""
+import json
+import re
+from typing import Any
+from urllib.parse import urlparse
+
+import httpx
+from playwright.async_api import async_playwright
+
+# OpenRouter Base-URL und Modell
+OPENROUTER_BASE = "https://openrouter.ai/api/v1"
+DEFAULT_MODEL = "google/gemini-flash-1.5-8b"
+
+
+async def _fetch_links_with_playwright(domain: str) -> list[dict[str, str]]:
+ """
+ Lädt die Startseite der Domain mit Playwright (headless) und
+ extrahiert alle -Tags (Text und Href).
+ """
+ # Domain mit Schema normalisieren
+ url = domain if domain.startswith("http") else f"https://{domain}"
+ links: list[dict[str, str]] = []
+
+ async with async_playwright() as p:
+ browser = await p.chromium.launch(headless=True)
+ try:
+ page = await browser.new_page()
+ await page.goto(url, wait_until="domcontentloaded", timeout=15000)
+ # Alle auslesen
+ links = await page.evaluate(
+ """() => {
+ const anchors = document.querySelectorAll('a[href]');
+ return Array.from(anchors).map(a => ({
+ text: (a.textContent || '').trim().slice(0, 200),
+ href: a.getAttribute('href') || ''
+ })).filter(x => x.href);
+ }"""
+ )
+ finally:
+ await browser.close()
+
+ return links
+
+
+def _make_absolute(href: str, base_url: str) -> str:
+ """Macht relative URLs absolut (einfache Heuristik)."""
+ if not href or href.startswith("#"):
+ return ""
+ if href.startswith("http://") or href.startswith("https://"):
+ return href
+ base = base_url.rstrip("/")
+ if href.startswith("/"):
+ parsed = urlparse(base)
+ return f"{parsed.scheme}://{parsed.netloc}{href}"
+ return f"{base}/{href}"
+
+
+async def _ask_openrouter(api_key: str, links: list[dict[str, str]], domain: str) -> str | None:
+ """
+ Sendet die Link-Liste an OpenRouter und fordert die beste Publikations-URL an.
+ Erwartet Antwort im Format: {"url": "..."}
+ """
+ base_url = domain if domain.startswith("http") else f"https://{domain}"
+ prompt = (
+ "Analysiere diese Links einer Unternehmensberatung. "
+ "Welcher Link führt zur Seite mit Reports, Insights oder Fachartikeln? "
+ "Antworte NUR mit der absoluten URL im JSON-Format: {'url': '...'}"
+ )
+ links_text = "\n".join(
+ f"- {l.get('text', '')} -> {_make_absolute(l.get('href', ''), base_url)}"
+ for l in links[:80] # Begrenzen, um Token-Limit zu schonen
+ )
+ user_content = f"Links von {domain}:\n{links_text}"
+
+ async with httpx.AsyncClient(timeout=30.0) as client:
+ resp = await client.post(
+ f"{OPENROUTER_BASE}/chat/completions",
+ headers={
+ "Authorization": f"Bearer {api_key}",
+ "Content-Type": "application/json",
+ "HTTP-Referer": base_url,
+ },
+ json={
+ "model": DEFAULT_MODEL,
+ "messages": [
+ {"role": "system", "content": prompt},
+ {"role": "user", "content": user_content},
+ ],
+ "max_tokens": 256,
+ },
+ )
+ resp.raise_for_status()
+ data = resp.json()
+ choice = (data.get("choices") or [None])[0]
+ if not choice:
+ return None
+ content = (choice.get("message") or {}).get("content") or ""
+ if not content.strip():
+ return None
+
+ # JSON aus Antwort extrahieren (falls von Markdown umgeben)
+ content = content.strip()
+ json_match = re.search(r"\{[^{}]*\"url\"[^{}]*\}", content)
+ if json_match:
+ try:
+ obj = json.loads(json_match.group())
+ return (obj.get("url") or "").strip() or None
+ except json.JSONDecodeError:
+ pass
+ try:
+ obj = json.loads(content)
+ return (obj.get("url") or "").strip() or None
+ except json.JSONDecodeError:
+ return None
+
+
+async def get_publication_url(domain: str, *, api_key: str | None = None) -> dict[str, Any]:
+ """
+ Hauptfunktion: Domain scannen, Links an OpenRouter senden,
+ gefundene Publikations-URL zurückgeben.
+ """
+ import os
+ key = api_key or os.getenv("OPENROUTER_API_KEY")
+ if not key:
+ return {"url": None, "error": "OPENROUTER_API_KEY nicht gesetzt"}
+
+ try:
+ links = await _fetch_links_with_playwright(domain)
+ except Exception as e:
+ return {"url": None, "error": f"Playwright/Scrape-Fehler: {e!s}"}
+
+ if not links:
+ return {"url": None, "error": "Keine Links auf der Seite gefunden"}
+
+ try:
+ url = await _ask_openrouter(key, links, domain)
+ return {"url": url, "error": None}
+ except Exception as e:
+ return {"url": None, "error": f"OpenRouter-Fehler: {e!s}"}