diff --git a/app/embed_server.py b/app/embed_server.py new file mode 100644 index 0000000..ec9f6e9 --- /dev/null +++ b/app/embed_server.py @@ -0,0 +1,38 @@ +# FastAPI-Server für 384-d Embeddings (all-MiniLM-L6-v2) +from __future__ import annotations +from fastapi import FastAPI, HTTPException +from pydantic import BaseModel +from typing import List, Optional +from sentence_transformers import SentenceTransformer + +app = FastAPI(title="mindnet-embed", version="1.0") + +MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2" # 384-d +_model: SentenceTransformer | None = None + +class EmbedIn(BaseModel): + model: Optional[str] = None + inputs: List[str] + +class EmbedOut(BaseModel): + embeddings: List[List[float]] + +@app.on_event("startup") +def _load_model(): + global _model + _model = SentenceTransformer(MODEL_NAME) + +@app.post("/embed", response_model=EmbedOut) +def embed(payload: EmbedIn) -> EmbedOut: + if _model is None: + raise HTTPException(status_code=503, detail="Model not loaded") + if not payload.inputs: + return EmbedOut(embeddings=[]) + vecs = _model.encode(payload.inputs, normalize_embeddings=False).tolist() + if any(len(v) != 384 for v in vecs): + raise HTTPException(status_code=500, detail="Embedding size mismatch (expected 384)") + return EmbedOut(embeddings=vecs) + +@app.get("/health") +def health(): + return {"ok": True, "model": MODEL_NAME, "dim": 384}