[Minor, Performance] Use torch.argmax for greedy sampling (#1589)

This commit is contained in:
Ying Sheng
2024-10-06 13:15:05 -07:00
committed by GitHub
parent 9c064bf78a
commit c98e84c21e
3 changed files with 34 additions and 2 deletions

View File

@@ -27,11 +27,11 @@ class TestBenchServing(unittest.TestCase):
model=DEFAULT_MODEL_NAME_FOR_TEST,
num_prompts=200,
request_rate=float("inf"),
other_server_args=["--max-running-requests", "10"],
dataset_name="sharegpt",
random_input_len=None,
random_output_len=None,
disable_stream=True,
other_server_args=["--max-running-requests", "10"],
)
if is_in_ci():