app/embed_server.py aktualisiert
All checks were successful
Deploy mindnet to llm-node / deploy (push) Successful in 3s
All checks were successful
Deploy mindnet to llm-node / deploy (push) Successful in 3s
This commit is contained in:
parent
d4e333325e
commit
3c0a406358
|
|
@ -1,4 +1,3 @@
|
|||
# FastAPI-Server für 384-d Embeddings (all-MiniLM-L6-v2)
|
||||
from __future__ import annotations
|
||||
from fastapi import FastAPI, HTTPException
|
||||
from pydantic import BaseModel
|
||||
|
|
@ -7,7 +6,7 @@ from sentence_transformers import SentenceTransformer
|
|||
|
||||
app = FastAPI(title="mindnet-embed", version="1.0")
|
||||
|
||||
MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2" # 384-d
|
||||
MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2" # 384-dim
|
||||
_model: SentenceTransformer | None = None
|
||||
|
||||
class EmbedIn(BaseModel):
|
||||
|
|
@ -22,6 +21,10 @@ def _load_model():
|
|||
global _model
|
||||
_model = SentenceTransformer(MODEL_NAME)
|
||||
|
||||
@app.get("/health")
|
||||
def health():
|
||||
return {"ok": True, "model": MODEL_NAME, "dim": 384}
|
||||
|
||||
@app.post("/embed", response_model=EmbedOut)
|
||||
def embed(payload: EmbedIn) -> EmbedOut:
|
||||
if _model is None:
|
||||
|
|
@ -32,7 +35,3 @@ def embed(payload: EmbedIn) -> EmbedOut:
|
|||
if any(len(v) != 384 for v in vecs):
|
||||
raise HTTPException(status_code=500, detail="Embedding size mismatch (expected 384)")
|
||||
return EmbedOut(embeddings=vecs)
|
||||
|
||||
@app.get("/health")
|
||||
def health():
|
||||
return {"ok": True, "model": MODEL_NAME, "dim": 384}
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user