Generate 1 token to verify the health of the inference service in /health (#1154)

Co-authored-by: Yineng Zhang <me@zhyncs.com>
This commit is contained in:
Lucien
2024-08-21 01:14:34 +08:00
committed by GitHub
parent 04707b09b7
commit 6242c399ab

View File

@@ -89,6 +89,23 @@ app = FastAPI()
tokenizer_manager = None
@app.get("/v1/health")
async def health(request: Request) -> Response:
"""
Generate 1 token to verify the health of the inference service.
"""
gri = GenerateReqInput(
text="s", sampling_params={"max_new_tokens": 1, "temperature": 0.7}
)
try:
async for _ in tokenizer_manager.generate_request(gri, request):
break
return Response(status_code=200)
except Exception as e:
logger.exception(e)
return Response(status_code=503)
@app.get("/health")
async def health() -> Response:
"""Health check."""