From eae09a84b31dd705ec6ae48659a53de867ac4232 Mon Sep 17 00:00:00 2001 From: Lars Date: Wed, 3 Sep 2025 12:29:42 +0200 Subject: [PATCH] =?UTF-8?q?app/embed=5Fserver.py=20hinzugef=C3=BCgt?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/embed_server.py | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 app/embed_server.py diff --git a/app/embed_server.py b/app/embed_server.py new file mode 100644 index 0000000..ec9f6e9 --- /dev/null +++ b/app/embed_server.py @@ -0,0 +1,38 @@ +# FastAPI-Server für 384-d Embeddings (all-MiniLM-L6-v2) +from __future__ import annotations +from fastapi import FastAPI, HTTPException +from pydantic import BaseModel +from typing import List, Optional +from sentence_transformers import SentenceTransformer + +app = FastAPI(title="mindnet-embed", version="1.0") + +MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2" # 384-d +_model: SentenceTransformer | None = None + +class EmbedIn(BaseModel): + model: Optional[str] = None + inputs: List[str] + +class EmbedOut(BaseModel): + embeddings: List[List[float]] + +@app.on_event("startup") +def _load_model(): + global _model + _model = SentenceTransformer(MODEL_NAME) + +@app.post("/embed", response_model=EmbedOut) +def embed(payload: EmbedIn) -> EmbedOut: + if _model is None: + raise HTTPException(status_code=503, detail="Model not loaded") + if not payload.inputs: + return EmbedOut(embeddings=[]) + vecs = _model.encode(payload.inputs, normalize_embeddings=False).tolist() + if any(len(v) != 384 for v in vecs): + raise HTTPException(status_code=500, detail="Embedding size mismatch (expected 384)") + return EmbedOut(embeddings=vecs) + +@app.get("/health") +def health(): + return {"ok": True, "model": MODEL_NAME, "dim": 384}