app/services/llm_ollama.py hinzugefügt

2025-10-07 11:37:40 +02:00 · 2025-10-07 11:37:40 +02:00 · f85aa8f604
commit f85aa8f604
parent e5ca3c4177
1 changed files with 88 additions and 0 deletions
--- a/app/services/llm_ollama.py
+++ b/app/services/llm_ollama.py
@ -0,0 +1,88 @@
 """
 app/services/llm_ollama.py — Ollama-Integration & Prompt-Bau (WP-04)
 Zweck:
    Prompt-Template & (optionaler) lokaler Aufruf von Ollama. Der Aufruf ist
    bewusst gekapselt und kann gefahrlos deaktiviert bleiben, bis ihr ein
    konkretes Modell konfigurieren wollt.
 Kompatibilität:
    Python 3.12+
 Version:
    0.1.0  (Erstanlage)
 Stand:
    2025-10-07
 Bezug:
    WP-04/05 Kontextbereitstellung für LLM
 Nutzung:
    from app.services.llm_ollama import build_prompt, call_ollama
 Änderungsverlauf:
    0.1.0 (2025-10-07) – Erstanlage.
 """
 from __future__ import annotations
 from typing import List, Dict, Optional
 import subprocess
 import json
 PROMPT_TEMPLATE = """System: You are a helpful expert.
 User: {question}
 Context (ranked):
 {contexts}
 Task: Answer precisely. At the end, list sources (note title + section) and important edge paths.
 """
 def build_context_block(items: List[Dict]) -> str:
    """Formatiert Top-K-Kontexte (Chunks) für den Prompt."""
    lines = []
    for i, it in enumerate(items, 1):
        note = it.get("note_title", "") or it.get("note_id", "")
        sec = it.get("section", "") or it.get("section_title", "")
        sc = it.get("score", 0)
        txt = it.get("text", "") or it.get("body", "") or ""
        lines.append(f"{i}) {note} — {sec} [score={sc:.2f}]\n{txt}\n")
    return "\n".join(lines)
 def build_prompt(question: str, contexts: List[Dict]) -> str:
    """Setzt Frage + Kontexte in ein konsistentes Template."""
    return PROMPT_TEMPLATE.format(question=question, contexts=build_context_block(contexts))
 def call_ollama(prompt: str, model: str = "llama3.1:8b", timeout_s: int = 120) -> Optional[str]:
    """
    Optionaler lokaler Aufruf von `ollama run`.
    Rückgabe: generierter Text oder None bei Fehler/Abbruch.
    Hinweis: Nur nutzen, wenn Ollama lokal installiert/konfiguriert ist.
    """
    try:
        proc = subprocess.run(
            ["ollama", "run", model],
            input=prompt.encode("utf-8"),
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
            timeout=timeout_s,
            check=False,
        )
        out = proc.stdout.decode("utf-8", errors="replace")
        # viele ollama Builds streamen JSON-Zeilen; robust extrahieren:
        try:
            # Falls JSONL, letztes "response" zusammenfassen
            texts = []
            for line in out.splitlines():
                line = line.strip()
                if not line:
                    continue
                try:
                    obj = json.loads(line)
                    if "response" in obj:
                        texts.append(obj["response"])
                except Exception:
                    texts.append(line)
            return "".join(texts).strip()
        except Exception:
            return out.strip()
    except Exception:
        return None