From 18c9a78637419bdbe74bf2f08f94fba805956b1b Mon Sep 17 00:00:00 2001 From: Lars Date: Wed, 3 Sep 2025 12:25:58 +0200 Subject: [PATCH] =?UTF-8?q?embed=5Fserver.py=20hinzugef=C3=BCgt?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- embed_server.py | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 embed_server.py diff --git a/embed_server.py b/embed_server.py new file mode 100644 index 0000000..ec9f6e9 --- /dev/null +++ b/embed_server.py @@ -0,0 +1,38 @@ +# FastAPI-Server für 384-d Embeddings (all-MiniLM-L6-v2) +from __future__ import annotations +from fastapi import FastAPI, HTTPException +from pydantic import BaseModel +from typing import List, Optional +from sentence_transformers import SentenceTransformer + +app = FastAPI(title="mindnet-embed", version="1.0") + +MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2" # 384-d +_model: SentenceTransformer | None = None + +class EmbedIn(BaseModel): + model: Optional[str] = None + inputs: List[str] + +class EmbedOut(BaseModel): + embeddings: List[List[float]] + +@app.on_event("startup") +def _load_model(): + global _model + _model = SentenceTransformer(MODEL_NAME) + +@app.post("/embed", response_model=EmbedOut) +def embed(payload: EmbedIn) -> EmbedOut: + if _model is None: + raise HTTPException(status_code=503, detail="Model not loaded") + if not payload.inputs: + return EmbedOut(embeddings=[]) + vecs = _model.encode(payload.inputs, normalize_embeddings=False).tolist() + if any(len(v) != 384 for v in vecs): + raise HTTPException(status_code=500, detail="Embedding size mismatch (expected 384)") + return EmbedOut(embeddings=vecs) + +@app.get("/health") +def health(): + return {"ok": True, "model": MODEL_NAME, "dim": 384}