from __future__ import annotations from fastapi import FastAPI, HTTPException from pydantic import BaseModel from typing import List, Optional from sentence_transformers import SentenceTransformer app = FastAPI(title="mindnet-embed", version="1.0") MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2" # 384-dim _model: SentenceTransformer | None = None class EmbedIn(BaseModel): model: Optional[str] = None inputs: List[str] class EmbedOut(BaseModel): embeddings: List[List[float]] @app.on_event("startup") def _load_model(): global _model _model = SentenceTransformer(MODEL_NAME) @app.get("/health") def health(): return {"ok": True, "model": MODEL_NAME, "dim": 384} @app.post("/embed", response_model=EmbedOut) def embed(payload: EmbedIn) -> EmbedOut: if _model is None: raise HTTPException(status_code=503, detail="Model not loaded") if not payload.inputs: return EmbedOut(embeddings=[]) vecs = _model.encode(payload.inputs, normalize_embeddings=False).tolist() if any(len(v) != 384 for v in vecs): raise HTTPException(status_code=500, detail="Embedding size mismatch (expected 384)") return EmbedOut(embeddings=vecs)