Fuse sorted_token_ids padding to moe_align_block_size kernel (#7437)
This commit is contained in:
@@ -59,7 +59,8 @@ TORCH_LIBRARY_EXPAND(sgl_kernel, m) {
|
||||
*/
|
||||
m.def(
|
||||
"moe_align_block_size(Tensor topk_ids, int num_experts, int block_size, Tensor! sorted_token_ids, Tensor! "
|
||||
"experts_ids, Tensor! num_tokens_post_pad, Tensor! token_cnts_buffer, Tensor! cumsum_buffer) -> ()");
|
||||
"experts_ids, Tensor! num_tokens_post_pad, Tensor! token_cnts_buffer, Tensor! cumsum_buffer, bool "
|
||||
"pad_sorted_token_ids) -> ()");
|
||||
m.impl("moe_align_block_size", torch::kCUDA, &moe_align_block_size);
|
||||
|
||||
m.def(
|
||||
|
||||
Reference in New Issue
Block a user