diff --git a/app/services/llm_ollama.py b/app/services/llm_ollama.py
new file mode 100644
index 0000000..ae5fc2f
--- /dev/null
+++ b/app/services/llm_ollama.py
@@ -0,0 +1,88 @@
+"""
+app/services/llm_ollama.py — Ollama-Integration & Prompt-Bau (WP-04)
+
+Zweck:
+    Prompt-Template & (optionaler) lokaler Aufruf von Ollama. Der Aufruf ist
+    bewusst gekapselt und kann gefahrlos deaktiviert bleiben, bis ihr ein
+    konkretes Modell konfigurieren wollt.
+Kompatibilität:
+    Python 3.12+
+Version:
+    0.1.0  (Erstanlage)
+Stand:
+    2025-10-07
+Bezug:
+    WP-04/05 Kontextbereitstellung für LLM
+Nutzung:
+    from app.services.llm_ollama import build_prompt, call_ollama
+Änderungsverlauf:
+    0.1.0 (2025-10-07) – Erstanlage.
+"""
+
+from __future__ import annotations
+from typing import List, Dict, Optional
+import subprocess
+import json
+
+PROMPT_TEMPLATE = """System: You are a helpful expert.
+User: {question}
+
+Context (ranked):
+{contexts}
+
+Task: Answer precisely. At the end, list sources (note title + section) and important edge paths.
+"""
+
+
+def build_context_block(items: List[Dict]) -> str:
+    """Formatiert Top-K-Kontexte (Chunks) für den Prompt."""
+    lines = []
+    for i, it in enumerate(items, 1):
+        note = it.get("note_title", "") or it.get("note_id", "")
+        sec = it.get("section", "") or it.get("section_title", "")
+        sc = it.get("score", 0)
+        txt = it.get("text", "") or it.get("body", "") or ""
+        lines.append(f"{i}) {note} — {sec} [score={sc:.2f}]\n{txt}\n")
+    return "\n".join(lines)
+
+
+def build_prompt(question: str, contexts: List[Dict]) -> str:
+    """Setzt Frage + Kontexte in ein konsistentes Template."""
+    return PROMPT_TEMPLATE.format(question=question, contexts=build_context_block(contexts))
+
+
+def call_ollama(prompt: str, model: str = "llama3.1:8b", timeout_s: int = 120) -> Optional[str]:
+    """
+    Optionaler lokaler Aufruf von `ollama run`.
+    Rückgabe: generierter Text oder None bei Fehler/Abbruch.
+    Hinweis: Nur nutzen, wenn Ollama lokal installiert/konfiguriert ist.
+    """
+    try:
+        proc = subprocess.run(
+            ["ollama", "run", model],
+            input=prompt.encode("utf-8"),
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            timeout=timeout_s,
+            check=False,
+        )
+        out = proc.stdout.decode("utf-8", errors="replace")
+        # viele ollama Builds streamen JSON-Zeilen; robust extrahieren:
+        try:
+            # Falls JSONL, letztes "response" zusammenfassen
+            texts = []
+            for line in out.splitlines():
+                line = line.strip()
+                if not line:
+                    continue
+                try:
+                    obj = json.loads(line)
+                    if "response" in obj:
+                        texts.append(obj["response"])
+                except Exception:
+                    texts.append(line)
+            return "".join(texts).strip()
+        except Exception:
+            return out.strip()
+    except Exception:
+        return None