From 92cb93f39078a3aa8f356ecd26e71bfa7a3dd962 Mon Sep 17 00:00:00 2001 From: Liangsheng Yin Date: Sat, 22 Jun 2024 15:11:04 +0800 Subject: [PATCH] Fix latency benchmark (#557) --- benchmark/latency_throughput/test_latency.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/benchmark/latency_throughput/test_latency.py b/benchmark/latency_throughput/test_latency.py index fc66b231e..af1dfcbe1 100644 --- a/benchmark/latency_throughput/test_latency.py +++ b/benchmark/latency_throughput/test_latency.py @@ -1,5 +1,4 @@ import argparse -import random import time import requests @@ -9,6 +8,7 @@ if __name__ == "__main__": parser.add_argument("--host", type=str, default="http://127.0.0.1") parser.add_argument("--port", type=int, default=None) parser.add_argument("--backend", type=str, default="srt") + parser.add_argument("--max-tokens", type=int, default=256) args = parser.parse_args() if args.port is None: @@ -25,7 +25,7 @@ if __name__ == "__main__": url = f"{args.host}:{args.port}" a = 20 - max_new_tokens = 256 + max_new_tokens = args.max_tokens prompt = f"{a, }" tic = time.time() @@ -37,6 +37,7 @@ if __name__ == "__main__": "sampling_params": { "temperature": 0, "max_new_tokens": max_new_tokens, + "ignore_eos": True, }, }, ) @@ -48,6 +49,7 @@ if __name__ == "__main__": "parameters": { "temperature": 0, "max_new_tokens": max_new_tokens, + "ignore_eos": True, }, }, ) @@ -58,6 +60,7 @@ if __name__ == "__main__": "prompt": prompt, "temperature": 0, "max_tokens": max_new_tokens, + "ignore_eos": True, }, ) elif args.backend == "ginfer":