Fix bench_one_batch producing unnatural results for expert parallel (#5149)
This commit is contained in:
@@ -207,7 +207,7 @@ def prepare_extend_inputs_for_correctness_test(
|
||||
|
||||
|
||||
def prepare_synthetic_inputs_for_latency_test(batch_size, input_len):
|
||||
input_ids = np.ones((batch_size, input_len), dtype=np.int32)
|
||||
input_ids = np.random.randint(0, 10000, (batch_size, input_len), dtype=np.int32)
|
||||
sampling_params = SamplingParams(
|
||||
temperature=0,
|
||||
max_new_tokens=BenchArgs.output_len,
|
||||
|
||||
Reference in New Issue
Block a user