""" app/services/embeddings_client.py — Text→Embedding Service Zweck: Einheitlicher Client für Embeddings via Ollama (Nomic). Stellt sicher, dass sowohl Async (Ingestion) als auch Sync (Retriever) denselben Vektorraum (768 Dim) nutzen. Version: 2.5.0 (Unified Ollama) """ from __future__ import annotations import os import logging import httpx import requests # Für den synchronen Fallback from typing import List from app.config import get_settings logger = logging.getLogger(__name__) class EmbeddingsClient: """ Async Client für Embeddings via Ollama. """ def __init__(self): self.settings = get_settings() self.base_url = os.getenv("MINDNET_OLLAMA_URL", "http://127.0.0.1:11434") self.model = os.getenv("MINDNET_EMBEDDING_MODEL") if not self.model: self.model = os.getenv("MINDNET_LLM_MODEL", "phi3:mini") logger.warning(f"No MINDNET_EMBEDDING_MODEL set. Fallback to '{self.model}'.") async def embed_query(self, text: str) -> List[float]: return await self._request_embedding(text) async def embed_documents(self, texts: List[str]) -> List[List[float]]: vectors = [] # Längeres Timeout für Batches async with httpx.AsyncClient(timeout=120.0) as client: for text in texts: vec = await self._request_embedding_with_client(client, text) vectors.append(vec) return vectors async def _request_embedding(self, text: str) -> List[float]: async with httpx.AsyncClient(timeout=30.0) as client: return await self._request_embedding_with_client(client, text) async def _request_embedding_with_client(self, client: httpx.AsyncClient, text: str) -> List[float]: if not text or not text.strip(): return [] url = f"{self.base_url}/api/embeddings" try: response = await client.post(url, json={"model": self.model, "prompt": text}) response.raise_for_status() return response.json().get("embedding", []) except Exception as e: logger.error(f"Async embedding failed: {e}") return [] # ============================================================================== # TEIL 2: SYNCHRONER FALLBACK (Unified) # ============================================================================== def embed_text(text: str) -> List[float]: """ LEGACY/SYNC: Nutzt jetzt ebenfalls OLLAMA via 'requests'. Ersetzt SentenceTransformers, um Dimensionskonflikte (768 vs 384) zu lösen. """ if not text or not text.strip(): return [] base_url = os.getenv("MINDNET_OLLAMA_URL", "http://127.0.0.1:11434") model = os.getenv("MINDNET_EMBEDDING_MODEL") # Fallback logik identisch zur Klasse if not model: model = os.getenv("MINDNET_LLM_MODEL", "phi3:mini") url = f"{base_url}/api/embeddings" try: # Synchroner Request (blockierend) response = requests.post(url, json={"model": model, "prompt": text}, timeout=30) response.raise_for_status() data = response.json() return data.get("embedding", []) except Exception as e: logger.error(f"Sync embedding (Ollama) failed: {e}") return []