app/embed_server.py hinzugefügt
All checks were successful
Deploy mindnet to llm-node / deploy (push) Successful in 3s
All checks were successful
Deploy mindnet to llm-node / deploy (push) Successful in 3s
This commit is contained in:
parent
cf4a0be177
commit
eae09a84b3
38
app/embed_server.py
Normal file
38
app/embed_server.py
Normal file
|
|
@ -0,0 +1,38 @@
|
||||||
|
# FastAPI-Server für 384-d Embeddings (all-MiniLM-L6-v2)
|
||||||
|
from __future__ import annotations
|
||||||
|
from fastapi import FastAPI, HTTPException
|
||||||
|
from pydantic import BaseModel
|
||||||
|
from typing import List, Optional
|
||||||
|
from sentence_transformers import SentenceTransformer
|
||||||
|
|
||||||
|
app = FastAPI(title="mindnet-embed", version="1.0")
|
||||||
|
|
||||||
|
MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2" # 384-d
|
||||||
|
_model: SentenceTransformer | None = None
|
||||||
|
|
||||||
|
class EmbedIn(BaseModel):
|
||||||
|
model: Optional[str] = None
|
||||||
|
inputs: List[str]
|
||||||
|
|
||||||
|
class EmbedOut(BaseModel):
|
||||||
|
embeddings: List[List[float]]
|
||||||
|
|
||||||
|
@app.on_event("startup")
|
||||||
|
def _load_model():
|
||||||
|
global _model
|
||||||
|
_model = SentenceTransformer(MODEL_NAME)
|
||||||
|
|
||||||
|
@app.post("/embed", response_model=EmbedOut)
|
||||||
|
def embed(payload: EmbedIn) -> EmbedOut:
|
||||||
|
if _model is None:
|
||||||
|
raise HTTPException(status_code=503, detail="Model not loaded")
|
||||||
|
if not payload.inputs:
|
||||||
|
return EmbedOut(embeddings=[])
|
||||||
|
vecs = _model.encode(payload.inputs, normalize_embeddings=False).tolist()
|
||||||
|
if any(len(v) != 384 for v in vecs):
|
||||||
|
raise HTTPException(status_code=500, detail="Embedding size mismatch (expected 384)")
|
||||||
|
return EmbedOut(embeddings=vecs)
|
||||||
|
|
||||||
|
@app.get("/health")
|
||||||
|
def health():
|
||||||
|
return {"ok": True, "model": MODEL_NAME, "dim": 384}
|
||||||
Loading…
Reference in New Issue
Block a user