[Fix] Fix major performance bug in certain cases (#1563)
Co-authored-by: hnyls2002 <hnyls2002@gmail.com>
This commit is contained in:
@@ -20,7 +20,22 @@ class TestBenchServing(unittest.TestCase):
|
||||
)
|
||||
|
||||
if is_in_ci():
|
||||
assert res["output_throughput"] > 2600
|
||||
assert res["output_throughput"] > 2830
|
||||
|
||||
def test_offline_throughput_non_stream_small_batch_size(self):
|
||||
res = run_bench_serving(
|
||||
model=DEFAULT_MODEL_NAME_FOR_TEST,
|
||||
num_prompts=200,
|
||||
request_rate=float("inf"),
|
||||
dataset_name="sharegpt",
|
||||
random_input_len=None,
|
||||
random_output_len=None,
|
||||
disable_stream=True,
|
||||
other_server_args=["--max-running-requests", "10"],
|
||||
)
|
||||
|
||||
if is_in_ci():
|
||||
assert res["output_throughput"] > 1000
|
||||
|
||||
def test_offline_throughput_without_radix_cache(self):
|
||||
res = run_bench_serving(
|
||||
@@ -31,7 +46,7 @@ class TestBenchServing(unittest.TestCase):
|
||||
)
|
||||
|
||||
if is_in_ci():
|
||||
assert res["output_throughput"] > 2800
|
||||
assert res["output_throughput"] > 2880
|
||||
|
||||
def test_offline_throughput_without_chunked_prefill(self):
|
||||
res = run_bench_serving(
|
||||
@@ -58,7 +73,7 @@ class TestBenchServing(unittest.TestCase):
|
||||
)
|
||||
|
||||
if is_in_ci():
|
||||
assert res["output_throughput"] > 2600
|
||||
assert res["output_throughput"] > 2930
|
||||
|
||||
def test_offline_throughput_default_fp8(self):
|
||||
res = run_bench_serving(
|
||||
|
||||
Reference in New Issue
Block a user