Fix bench_one_batch producing unnatural results for expert parallel (#5149)
This commit is contained in:
@@ -207,7 +207,7 @@ def prepare_extend_inputs_for_correctness_test(
|
|||||||
|
|
||||||
|
|
||||||
def prepare_synthetic_inputs_for_latency_test(batch_size, input_len):
|
def prepare_synthetic_inputs_for_latency_test(batch_size, input_len):
|
||||||
input_ids = np.ones((batch_size, input_len), dtype=np.int32)
|
input_ids = np.random.randint(0, 10000, (batch_size, input_len), dtype=np.int32)
|
||||||
sampling_params = SamplingParams(
|
sampling_params = SamplingParams(
|
||||||
temperature=0,
|
temperature=0,
|
||||||
max_new_tokens=BenchArgs.output_len,
|
max_new_tokens=BenchArgs.output_len,
|
||||||
|
|||||||
Reference in New Issue
Block a user