anpassung an 786 vector

This commit is contained in:
Lars 2025-12-11 14:28:46 +01:00
parent 5fa02aed2d
commit b1cf89982b

View File

@ -1,15 +1,19 @@
""" """
app/services/embeddings_client.py app/services/embeddings_client.py TextEmbedding Service
Client für die Vektorisierung von Texten via Ollama API.
Version: 2.4.0 (Async + Dedicated Embedding Model Support) Zweck:
Einheitlicher Client für Embeddings via Ollama (Nomic).
Stellt sicher, dass sowohl Async (Ingestion) als auch Sync (Retriever)
denselben Vektorraum (768 Dim) nutzen.
Version: 2.5.0 (Unified Ollama)
""" """
from __future__ import annotations from __future__ import annotations
import os import os
import logging import logging
import httpx import httpx
from typing import List, Optional import requests # Für den synchronen Fallback
from functools import lru_cache from typing import List
from app.config import get_settings from app.config import get_settings
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -17,35 +21,22 @@ logger = logging.getLogger(__name__)
class EmbeddingsClient: class EmbeddingsClient:
""" """
Async Client für Embeddings via Ollama. Async Client für Embeddings via Ollama.
Trennt Chat-Modell (Generation) von Embedding-Modell (Semantik).
""" """
def __init__(self): def __init__(self):
self.settings = get_settings() self.settings = get_settings()
self.base_url = os.getenv("MINDNET_OLLAMA_URL", "http://127.0.0.1:11434") self.base_url = os.getenv("MINDNET_OLLAMA_URL", "http://127.0.0.1:11434")
# Lese Konfiguration für spezialisiertes Embedding-Modell
self.model = os.getenv("MINDNET_EMBEDDING_MODEL") self.model = os.getenv("MINDNET_EMBEDDING_MODEL")
# Fallback auf LLM, falls kein Embedding-Modell gesetzt (nicht empfohlen für Prod)
if not self.model: if not self.model:
self.model = os.getenv("MINDNET_LLM_MODEL", "phi3:mini") self.model = os.getenv("MINDNET_LLM_MODEL", "phi3:mini")
logger.warning(f"No MINDNET_EMBEDDING_MODEL set. Falling back to LLM '{self.model}'. Quality might suffer.") logger.warning(f"No MINDNET_EMBEDDING_MODEL set. Fallback to '{self.model}'.")
else:
logger.info(f"EmbeddingsClient initialized with model: {self.model}")
async def embed_query(self, text: str) -> List[float]: async def embed_query(self, text: str) -> List[float]:
"""
Erzeugt Embedding für einen einzelnen Text (z.B. Suchanfrage).
"""
return await self._request_embedding(text) return await self._request_embedding(text)
async def embed_documents(self, texts: List[str]) -> List[List[float]]: async def embed_documents(self, texts: List[str]) -> List[List[float]]:
"""
Erzeugt Embeddings für eine Liste von Texten (z.B. Chunks beim Import).
Nutzt eine persistente Session für Performance.
"""
vectors = [] vectors = []
# Timeout erhöht für Batch-Processing # Längeres Timeout für Batches
async with httpx.AsyncClient(timeout=120.0) as client: async with httpx.AsyncClient(timeout=120.0) as client:
for text in texts: for text in texts:
vec = await self._request_embedding_with_client(client, text) vec = await self._request_embedding_with_client(client, text)
@ -53,59 +44,47 @@ class EmbeddingsClient:
return vectors return vectors
async def _request_embedding(self, text: str) -> List[float]: async def _request_embedding(self, text: str) -> List[float]:
"""Interne Hilfsmethode für Single-Request."""
async with httpx.AsyncClient(timeout=30.0) as client: async with httpx.AsyncClient(timeout=30.0) as client:
return await self._request_embedding_with_client(client, text) return await self._request_embedding_with_client(client, text)
async def _request_embedding_with_client(self, client: httpx.AsyncClient, text: str) -> List[float]: async def _request_embedding_with_client(self, client: httpx.AsyncClient, text: str) -> List[float]:
""" if not text or not text.strip(): return []
Führt den eigentlichen HTTP-Request gegen Ollama aus.
"""
if not text or not text.strip():
return []
url = f"{self.base_url}/api/embeddings" url = f"{self.base_url}/api/embeddings"
try: try:
response = await client.post( response = await client.post(url, json={"model": self.model, "prompt": text})
url,
json={
"model": self.model,
"prompt": text
}
)
if response.status_code == 404:
logger.error(f"Model '{self.model}' not found in Ollama. Run: ollama pull {self.model}")
return []
response.raise_for_status() response.raise_for_status()
data = response.json() return response.json().get("embedding", [])
return data.get("embedding", [])
except Exception as e: except Exception as e:
logger.error(f"Embedding error (Model: {self.model}): {e}") logger.error(f"Async embedding failed: {e}")
# Wir geben eine leere Liste zurück, damit der Batch-Prozess nicht komplett crasht.
# Der Aufrufer (IngestionService) muss prüfen, ob Vektor leer ist.
return [] return []
# --- LEGACY SUPPORT (Synchron) --- # ==============================================================================
# Wird nur noch von alten Skripten oder Tests ohne Async-Support genutzt. # TEIL 2: SYNCHRONER FALLBACK (Unified)
# ==============================================================================
@lru_cache(maxsize=1)
def _cached_legacy_model():
from sentence_transformers import SentenceTransformer
s = get_settings()
# Hier nutzen wir das Modell aus den Settings, meist CPU-basiert
return SentenceTransformer(s.MODEL_NAME, device="cpu")
def embed_text(text: str) -> List[float]: def embed_text(text: str) -> List[float]:
""" """
LEGACY: Synchrones Embedding via SentenceTransformers (CPU). LEGACY/SYNC: Nutzt jetzt ebenfalls OLLAMA via 'requests'.
Ersetzt SentenceTransformers, um Dimensionskonflikte (768 vs 384) zu lösen.
""" """
if not text or not text.strip(): if not text or not text.strip():
return [] return []
base_url = os.getenv("MINDNET_OLLAMA_URL", "http://127.0.0.1:11434")
model = os.getenv("MINDNET_EMBEDDING_MODEL")
# Fallback logik identisch zur Klasse
if not model:
model = os.getenv("MINDNET_LLM_MODEL", "phi3:mini")
url = f"{base_url}/api/embeddings"
try: try:
return _cached_legacy_model().encode([text], normalize_embeddings=True)[0].tolist() # Synchroner Request (blockierend)
response = requests.post(url, json={"model": model, "prompt": text}, timeout=30)
response.raise_for_status()
data = response.json()
return data.get("embedding", [])
except Exception as e: except Exception as e:
logger.error(f"Legacy embed_text failed: {e}") logger.error(f"Sync embedding (Ollama) failed: {e}")
return [] return []