88 lines
3.2 KiB
Python
88 lines
3.2 KiB
Python
"""
|
|
FILE: app/services/embeddings_client.py
|
|
DESCRIPTION: Unified Embedding Client. Nutzt Ollama API (HTTP). Ersetzt lokale sentence-transformers.
|
|
VERSION: 2.5.0
|
|
STATUS: Active
|
|
DEPENDENCIES: httpx, requests, app.config
|
|
LAST_ANALYSIS: 2025-12-15
|
|
"""
|
|
from __future__ import annotations
|
|
import os
|
|
import logging
|
|
import httpx
|
|
import requests # Für den synchronen Fallback
|
|
from typing import List
|
|
from app.config import get_settings
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
class EmbeddingsClient:
|
|
"""
|
|
Async Client für Embeddings via Ollama.
|
|
"""
|
|
def __init__(self):
|
|
self.settings = get_settings()
|
|
self.base_url = os.getenv("MINDNET_OLLAMA_URL", "http://127.0.0.1:11434")
|
|
self.model = os.getenv("MINDNET_EMBEDDING_MODEL")
|
|
|
|
if not self.model:
|
|
self.model = os.getenv("MINDNET_LLM_MODEL", "phi3:mini")
|
|
logger.warning(f"No MINDNET_EMBEDDING_MODEL set. Fallback to '{self.model}'.")
|
|
|
|
async def embed_query(self, text: str) -> List[float]:
|
|
return await self._request_embedding(text)
|
|
|
|
async def embed_documents(self, texts: List[str]) -> List[List[float]]:
|
|
vectors = []
|
|
# Längeres Timeout für Batches
|
|
async with httpx.AsyncClient(timeout=120.0) as client:
|
|
for text in texts:
|
|
vec = await self._request_embedding_with_client(client, text)
|
|
vectors.append(vec)
|
|
return vectors
|
|
|
|
async def _request_embedding(self, text: str) -> List[float]:
|
|
async with httpx.AsyncClient(timeout=30.0) as client:
|
|
return await self._request_embedding_with_client(client, text)
|
|
|
|
async def _request_embedding_with_client(self, client: httpx.AsyncClient, text: str) -> List[float]:
|
|
if not text or not text.strip(): return []
|
|
url = f"{self.base_url}/api/embeddings"
|
|
try:
|
|
response = await client.post(url, json={"model": self.model, "prompt": text})
|
|
response.raise_for_status()
|
|
return response.json().get("embedding", [])
|
|
except Exception as e:
|
|
logger.error(f"Async embedding failed: {e}")
|
|
return []
|
|
|
|
# ==============================================================================
|
|
# TEIL 2: SYNCHRONER FALLBACK (Unified)
|
|
# ==============================================================================
|
|
|
|
def embed_text(text: str) -> List[float]:
|
|
"""
|
|
LEGACY/SYNC: Nutzt jetzt ebenfalls OLLAMA via 'requests'.
|
|
Ersetzt SentenceTransformers, um Dimensionskonflikte (768 vs 384) zu lösen.
|
|
"""
|
|
if not text or not text.strip():
|
|
return []
|
|
|
|
base_url = os.getenv("MINDNET_OLLAMA_URL", "http://127.0.0.1:11434")
|
|
model = os.getenv("MINDNET_EMBEDDING_MODEL")
|
|
|
|
# Fallback logik identisch zur Klasse
|
|
if not model:
|
|
model = os.getenv("MINDNET_LLM_MODEL", "phi3:mini")
|
|
|
|
url = f"{base_url}/api/embeddings"
|
|
|
|
try:
|
|
# Synchroner Request (blockierend)
|
|
response = requests.post(url, json={"model": model, "prompt": text}, timeout=30)
|
|
response.raise_for_status()
|
|
data = response.json()
|
|
return data.get("embedding", [])
|
|
except Exception as e:
|
|
logger.error(f"Sync embedding (Ollama) failed: {e}")
|
|
return [] |