anpassung an 786 vector
This commit is contained in:
parent
5fa02aed2d
commit
b1cf89982b
|
|
@ -1,15 +1,19 @@
|
||||||
"""
|
"""
|
||||||
app/services/embeddings_client.py
|
app/services/embeddings_client.py — Text→Embedding Service
|
||||||
Client für die Vektorisierung von Texten via Ollama API.
|
|
||||||
|
|
||||||
Version: 2.4.0 (Async + Dedicated Embedding Model Support)
|
Zweck:
|
||||||
|
Einheitlicher Client für Embeddings via Ollama (Nomic).
|
||||||
|
Stellt sicher, dass sowohl Async (Ingestion) als auch Sync (Retriever)
|
||||||
|
denselben Vektorraum (768 Dim) nutzen.
|
||||||
|
|
||||||
|
Version: 2.5.0 (Unified Ollama)
|
||||||
"""
|
"""
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
import os
|
import os
|
||||||
import logging
|
import logging
|
||||||
import httpx
|
import httpx
|
||||||
from typing import List, Optional
|
import requests # Für den synchronen Fallback
|
||||||
from functools import lru_cache
|
from typing import List
|
||||||
from app.config import get_settings
|
from app.config import get_settings
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
@ -17,35 +21,22 @@ logger = logging.getLogger(__name__)
|
||||||
class EmbeddingsClient:
|
class EmbeddingsClient:
|
||||||
"""
|
"""
|
||||||
Async Client für Embeddings via Ollama.
|
Async Client für Embeddings via Ollama.
|
||||||
Trennt Chat-Modell (Generation) von Embedding-Modell (Semantik).
|
|
||||||
"""
|
"""
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.settings = get_settings()
|
self.settings = get_settings()
|
||||||
self.base_url = os.getenv("MINDNET_OLLAMA_URL", "http://127.0.0.1:11434")
|
self.base_url = os.getenv("MINDNET_OLLAMA_URL", "http://127.0.0.1:11434")
|
||||||
|
|
||||||
# Lese Konfiguration für spezialisiertes Embedding-Modell
|
|
||||||
self.model = os.getenv("MINDNET_EMBEDDING_MODEL")
|
self.model = os.getenv("MINDNET_EMBEDDING_MODEL")
|
||||||
|
|
||||||
# Fallback auf LLM, falls kein Embedding-Modell gesetzt (nicht empfohlen für Prod)
|
|
||||||
if not self.model:
|
if not self.model:
|
||||||
self.model = os.getenv("MINDNET_LLM_MODEL", "phi3:mini")
|
self.model = os.getenv("MINDNET_LLM_MODEL", "phi3:mini")
|
||||||
logger.warning(f"No MINDNET_EMBEDDING_MODEL set. Falling back to LLM '{self.model}'. Quality might suffer.")
|
logger.warning(f"No MINDNET_EMBEDDING_MODEL set. Fallback to '{self.model}'.")
|
||||||
else:
|
|
||||||
logger.info(f"EmbeddingsClient initialized with model: {self.model}")
|
|
||||||
|
|
||||||
async def embed_query(self, text: str) -> List[float]:
|
async def embed_query(self, text: str) -> List[float]:
|
||||||
"""
|
|
||||||
Erzeugt Embedding für einen einzelnen Text (z.B. Suchanfrage).
|
|
||||||
"""
|
|
||||||
return await self._request_embedding(text)
|
return await self._request_embedding(text)
|
||||||
|
|
||||||
async def embed_documents(self, texts: List[str]) -> List[List[float]]:
|
async def embed_documents(self, texts: List[str]) -> List[List[float]]:
|
||||||
"""
|
|
||||||
Erzeugt Embeddings für eine Liste von Texten (z.B. Chunks beim Import).
|
|
||||||
Nutzt eine persistente Session für Performance.
|
|
||||||
"""
|
|
||||||
vectors = []
|
vectors = []
|
||||||
# Timeout erhöht für Batch-Processing
|
# Längeres Timeout für Batches
|
||||||
async with httpx.AsyncClient(timeout=120.0) as client:
|
async with httpx.AsyncClient(timeout=120.0) as client:
|
||||||
for text in texts:
|
for text in texts:
|
||||||
vec = await self._request_embedding_with_client(client, text)
|
vec = await self._request_embedding_with_client(client, text)
|
||||||
|
|
@ -53,59 +44,47 @@ class EmbeddingsClient:
|
||||||
return vectors
|
return vectors
|
||||||
|
|
||||||
async def _request_embedding(self, text: str) -> List[float]:
|
async def _request_embedding(self, text: str) -> List[float]:
|
||||||
"""Interne Hilfsmethode für Single-Request."""
|
|
||||||
async with httpx.AsyncClient(timeout=30.0) as client:
|
async with httpx.AsyncClient(timeout=30.0) as client:
|
||||||
return await self._request_embedding_with_client(client, text)
|
return await self._request_embedding_with_client(client, text)
|
||||||
|
|
||||||
async def _request_embedding_with_client(self, client: httpx.AsyncClient, text: str) -> List[float]:
|
async def _request_embedding_with_client(self, client: httpx.AsyncClient, text: str) -> List[float]:
|
||||||
"""
|
if not text or not text.strip(): return []
|
||||||
Führt den eigentlichen HTTP-Request gegen Ollama aus.
|
|
||||||
"""
|
|
||||||
if not text or not text.strip():
|
|
||||||
return []
|
|
||||||
|
|
||||||
url = f"{self.base_url}/api/embeddings"
|
url = f"{self.base_url}/api/embeddings"
|
||||||
try:
|
try:
|
||||||
response = await client.post(
|
response = await client.post(url, json={"model": self.model, "prompt": text})
|
||||||
url,
|
|
||||||
json={
|
|
||||||
"model": self.model,
|
|
||||||
"prompt": text
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
if response.status_code == 404:
|
|
||||||
logger.error(f"Model '{self.model}' not found in Ollama. Run: ollama pull {self.model}")
|
|
||||||
return []
|
|
||||||
|
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
data = response.json()
|
return response.json().get("embedding", [])
|
||||||
return data.get("embedding", [])
|
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Embedding error (Model: {self.model}): {e}")
|
logger.error(f"Async embedding failed: {e}")
|
||||||
# Wir geben eine leere Liste zurück, damit der Batch-Prozess nicht komplett crasht.
|
|
||||||
# Der Aufrufer (IngestionService) muss prüfen, ob Vektor leer ist.
|
|
||||||
return []
|
return []
|
||||||
|
|
||||||
# --- LEGACY SUPPORT (Synchron) ---
|
# ==============================================================================
|
||||||
# Wird nur noch von alten Skripten oder Tests ohne Async-Support genutzt.
|
# TEIL 2: SYNCHRONER FALLBACK (Unified)
|
||||||
|
# ==============================================================================
|
||||||
@lru_cache(maxsize=1)
|
|
||||||
def _cached_legacy_model():
|
|
||||||
from sentence_transformers import SentenceTransformer
|
|
||||||
s = get_settings()
|
|
||||||
# Hier nutzen wir das Modell aus den Settings, meist CPU-basiert
|
|
||||||
return SentenceTransformer(s.MODEL_NAME, device="cpu")
|
|
||||||
|
|
||||||
def embed_text(text: str) -> List[float]:
|
def embed_text(text: str) -> List[float]:
|
||||||
"""
|
"""
|
||||||
LEGACY: Synchrones Embedding via SentenceTransformers (CPU).
|
LEGACY/SYNC: Nutzt jetzt ebenfalls OLLAMA via 'requests'.
|
||||||
|
Ersetzt SentenceTransformers, um Dimensionskonflikte (768 vs 384) zu lösen.
|
||||||
"""
|
"""
|
||||||
if not text or not text.strip():
|
if not text or not text.strip():
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
base_url = os.getenv("MINDNET_OLLAMA_URL", "http://127.0.0.1:11434")
|
||||||
|
model = os.getenv("MINDNET_EMBEDDING_MODEL")
|
||||||
|
|
||||||
|
# Fallback logik identisch zur Klasse
|
||||||
|
if not model:
|
||||||
|
model = os.getenv("MINDNET_LLM_MODEL", "phi3:mini")
|
||||||
|
|
||||||
|
url = f"{base_url}/api/embeddings"
|
||||||
|
|
||||||
try:
|
try:
|
||||||
return _cached_legacy_model().encode([text], normalize_embeddings=True)[0].tolist()
|
# Synchroner Request (blockierend)
|
||||||
|
response = requests.post(url, json={"model": model, "prompt": text}, timeout=30)
|
||||||
|
response.raise_for_status()
|
||||||
|
data = response.json()
|
||||||
|
return data.get("embedding", [])
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Legacy embed_text failed: {e}")
|
logger.error(f"Sync embedding (Ollama) failed: {e}")
|
||||||
return []
|
return []
|
||||||
Loading…
Reference in New Issue
Block a user