[1/2][resubmit again] sgl-kernel: Fuse routed scaling factor into moe_fused_gate (#9088)

2025-08-12 20:12:38 -07:00
parent 8723b4f146
commit 13c48dcf88
4 changed files with 32 additions and 11 deletions
--- a/sgl-kernel/csrc/common_extension.cc
+++ b/sgl-kernel/csrc/common_extension.cc
@@ -175,7 +175,7 @@ TORCH_LIBRARY_FRAGMENT(sgl_kernel, m) {

  m.def(
      "moe_fused_gate(Tensor input, Tensor bias, int num_expert_group, int topk_group, int topk, int "
-      "num_fused_shared_experts, float routed_scaling_factor) -> "
+      "num_fused_shared_experts, float routed_scaling_factor, bool apply_routed_scaling_factor_on_output) -> "
      "(Tensor[])");
  m.impl("moe_fused_gate", torch::kCUDA, &moe_fused_gate);
  m.def(