mindnet/app/services/llm_ollama.py

"""
app/services/llm_ollama.py — Ollama-Integration & Prompt-Bau (WP-04)

Zweck:
    Prompt-Template & (optionaler) lokaler Aufruf von Ollama. Der Aufruf ist
    bewusst gekapselt und kann gefahrlos deaktiviert bleiben, bis ihr ein
    konkretes Modell konfigurieren wollt.
Kompatibilität:
    Python 3.12+
Version:
    0.1.0  (Erstanlage)
Stand:
    2025-10-07
Bezug:
    WP-04/05 Kontextbereitstellung für LLM
Nutzung:
    from app.services.llm_ollama import build_prompt, call_ollama
Änderungsverlauf:
    0.1.0 (2025-10-07) – Erstanlage.
"""

from __future__ import annotations
from typing import List, Dict, Optional
import subprocess
import json

PROMPT_TEMPLATE = """System: You are a helpful expert.
User: {question}

Context (ranked):
{contexts}

Task: Answer precisely. At the end, list sources (note title + section) and important edge paths.
"""


def build_context_block(items: List[Dict]) -> str:
    """Formatiert Top-K-Kontexte (Chunks) für den Prompt."""
    lines = []
    for i, it in enumerate(items, 1):
        note = it.get("note_title", "") or it.get("note_id", "")
        sec = it.get("section", "") or it.get("section_title", "")
        sc = it.get("score", 0)
        txt = it.get("text", "") or it.get("body", "") or ""
        lines.append(f"{i}) {note} — {sec} [score={sc:.2f}]\n{txt}\n")
    return "\n".join(lines)


def build_prompt(question: str, contexts: List[Dict]) -> str:
    """Setzt Frage + Kontexte in ein konsistentes Template."""
    return PROMPT_TEMPLATE.format(question=question, contexts=build_context_block(contexts))


def call_ollama(prompt: str, model: str = "llama3.1:8b", timeout_s: int = 120) -> Optional[str]:
    """
    Optionaler lokaler Aufruf von `ollama run`.
    Rückgabe: generierter Text oder None bei Fehler/Abbruch.
    Hinweis: Nur nutzen, wenn Ollama lokal installiert/konfiguriert ist.
    """
    try:
        proc = subprocess.run(
            ["ollama", "run", model],
            input=prompt.encode("utf-8"),
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
            timeout=timeout_s,
            check=False,
        )
        out = proc.stdout.decode("utf-8", errors="replace")
        # viele ollama Builds streamen JSON-Zeilen; robust extrahieren:
        try:
            # Falls JSONL, letztes "response" zusammenfassen
            texts = []
            for line in out.splitlines():
                line = line.strip()
                if not line:
                    continue
                try:
                    obj = json.loads(line)
                    if "response" in obj:
                        texts.append(obj["response"])
                except Exception:
                    texts.append(line)
            return "".join(texts).strip()
        except Exception:
            return out.strip()
    except Exception:
        return None