Sgl kernel fused_moe_gate support n_shared_experts (#5440)

2025-04-18 14:05:15 +08:00
parent 53dcf38876
commit 8e09b37077
5 changed files with 140 additions and 38 deletions
--- a/sgl-kernel/csrc/common_extension.cc
+++ b/sgl-kernel/csrc/common_extension.cc
@@ -146,7 +146,8 @@ TORCH_LIBRARY_FRAGMENT(sgl_kernel, m) {
  m.impl("topk_softmax", torch::kCUDA, &topk_softmax);

  m.def(
-      "moe_fused_gate(Tensor input, Tensor bias, int num_expert_group, int topk_group, int topk) -> "
+      "moe_fused_gate(Tensor input, Tensor bias, int num_expert_group, int topk_group, int topk, int "
+      "n_share_experts_fusion, float routed_scaling_factor) -> "
      "(Tensor[])");
  m.impl("moe_fused_gate", torch::kCUDA, &moe_fused_gate);