Fuse sorted_token_ids padding to moe_align_block_size kernel (#7437)

This commit is contained in:
Ke Bao
2025-06-25 08:44:27 +08:00
committed by GitHub
parent 112b496a6c
commit 57ab776910
7 changed files with 163 additions and 70 deletions

View File

@@ -59,7 +59,8 @@ TORCH_LIBRARY_EXPAND(sgl_kernel, m) {
*/
m.def(
"moe_align_block_size(Tensor topk_ids, int num_experts, int block_size, Tensor! sorted_token_ids, Tensor! "
"experts_ids, Tensor! num_tokens_post_pad, Tensor! token_cnts_buffer, Tensor! cumsum_buffer) -> ()");
"experts_ids, Tensor! num_tokens_post_pad, Tensor! token_cnts_buffer, Tensor! cumsum_buffer, bool "
"pad_sorted_token_ids) -> ()");
m.impl("moe_align_block_size", torch::kCUDA, &moe_align_block_size);
m.def(