Sgl kernel fused_moe_gate support n_shared_experts (#5440)

This commit is contained in:
Xiaoyu Zhang
2025-04-18 14:05:15 +08:00
committed by GitHub
parent 53dcf38876
commit 8e09b37077
5 changed files with 140 additions and 38 deletions

View File

@@ -146,7 +146,8 @@ TORCH_LIBRARY_FRAGMENT(sgl_kernel, m) {
m.impl("topk_softmax", torch::kCUDA, &topk_softmax);
m.def(
"moe_fused_gate(Tensor input, Tensor bias, int num_expert_group, int topk_group, int topk) -> "
"moe_fused_gate(Tensor input, Tensor bias, int num_expert_group, int topk_group, int topk, int "
"n_share_experts_fusion, float routed_scaling_factor) -> "
"(Tensor[])");
m.impl("moe_fused_gate", torch::kCUDA, &moe_fused_gate);