diff --git a/python/sglang/srt/layers/moe/ep_moe/kernels.py b/python/sglang/srt/layers/moe/ep_moe/kernels.py index 08660812d..d8e221d5c 100644 --- a/python/sglang/srt/layers/moe/ep_moe/kernels.py +++ b/python/sglang/srt/layers/moe/ep_moe/kernels.py @@ -1104,10 +1104,10 @@ def ep_gather( input_index: torch.Tensor, output_tensor: torch.Tensor, ): - BLOCK_D = 1024 if not is_in_ci() else 128 # block size of quantization num_warps = 2 num_tokens = output_tensor.shape[0] hidden_size = input_tensor.shape[1] + BLOCK_D = 128 if hidden_size % 1024 != 0 else 1024 # block size of quantization assert hidden_size % BLOCK_D == 0 grid = (triton.cdiv(hidden_size, BLOCK_D), min(num_tokens, 1024)) _fwd_kernel_ep_gather[grid](