From 71ab0dabe0de235148258250ddee0f4dba4fb896 Mon Sep 17 00:00:00 2001 From: HAI Date: Wed, 5 Mar 2025 10:56:51 -0800 Subject: [PATCH] Fix the moe padding conditional logic (#4081) --- python/sglang/srt/layers/moe/fused_moe_triton/fused_moe.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/fused_moe.py b/python/sglang/srt/layers/moe/fused_moe_triton/fused_moe.py index 92f46f009..bb39e2d9d 100644 --- a/python/sglang/srt/layers/moe/fused_moe_triton/fused_moe.py +++ b/python/sglang/srt/layers/moe/fused_moe_triton/fused_moe.py @@ -18,6 +18,7 @@ from sglang.srt.layers.quantization.fp8_kernel import per_token_group_quant_fp8 from sglang.srt.layers.quantization.int8_kernel import per_token_group_quant_int8 from sglang.srt.utils import ( direct_register_custom_op, + get_bool_env_var, get_device_name, is_cuda_available, is_hip, @@ -941,7 +942,11 @@ def fused_experts_impl( no_combine: bool = False, ): padded_size = padding_size - if not use_fp8_w8a8 or not use_int8_w8a8 or block_shape is not None: + if ( + not (use_fp8_w8a8 or use_int8_w8a8) + or block_shape is not None + or (is_hip_ and get_bool_env_var("CK_MOE")) + ): padded_size = 0 # Check constraints.