Add moe topk softmax templated from vllm (#4302)

This commit is contained in:
Qingquan Song
2025-03-14 12:03:33 -07:00
committed by GitHub
parent 660305c38a
commit 61e4433caf
9 changed files with 716 additions and 6 deletions

View File

@@ -117,6 +117,11 @@ TORCH_LIBRARY_EXPAND(sgl_kernel, m) {
"experts_ids, Tensor! num_tokens_post_pad, Tensor! token_cnts_buffer, Tensor! cumsum_buffer) -> ()");
m.impl("moe_align_block_size", torch::kCUDA, &moe_align_block_size);
m.def(
"topk_softmax(Tensor! topk_weights, Tensor! topk_indices, Tensor! "
"token_expert_indices, Tensor gating_output) -> ()");
m.impl("topk_softmax", torch::kCUDA, &topk_softmax);
/*
* From csrc/speculative
*/