Add fp4 quantize before all-gather for Flashinfer cutlass MoE DP (max throughput) (#7667)
This commit is contained in:
@@ -649,7 +649,7 @@ class ForwardBatch:
|
||||
num_tokens = global_num_tokens[0]
|
||||
|
||||
self.global_dp_buffer_len = buffer_len
|
||||
set_dp_buffer_len(buffer_len, num_tokens)
|
||||
set_dp_buffer_len(buffer_len, num_tokens, global_num_tokens)
|
||||
|
||||
bs = self.batch_size
|
||||
|
||||
|
||||
Reference in New Issue
Block a user