Fix bench_one_batch producing unnatural results for expert parallel (#5149)

This commit is contained in:
fzyzcjy
2025-04-20 15:38:04 +08:00
committed by GitHub
parent c555d794f7
commit d07e797ace

View File

@@ -207,7 +207,7 @@ def prepare_extend_inputs_for_correctness_test(
def prepare_synthetic_inputs_for_latency_test(batch_size, input_len):
input_ids = np.ones((batch_size, input_len), dtype=np.int32)
input_ids = np.random.randint(0, 10000, (batch_size, input_len), dtype=np.int32)
sampling_params = SamplingParams(
temperature=0,
max_new_tokens=BenchArgs.output_len,