Fix the moe padding conditional logic (#4081)

2025-03-05 10:56:51 -08:00
parent d3d4d76758
commit 71ab0dabe0
1 changed files with 6 additions and 1 deletions
--- a/python/sglang/srt/layers/moe/fused_moe_triton/fused_moe.py
+++ b/python/sglang/srt/layers/moe/fused_moe_triton/fused_moe.py
@@ -18,6 +18,7 @@ from sglang.srt.layers.quantization.fp8_kernel import per_token_group_quant_fp8
 from sglang.srt.layers.quantization.int8_kernel import per_token_group_quant_int8
 from sglang.srt.utils import (
    direct_register_custom_op,
+    get_bool_env_var,
    get_device_name,
    is_cuda_available,
    is_hip,
@@ -941,7 +942,11 @@ def fused_experts_impl(
    no_combine: bool = False,
 ):
    padded_size = padding_size
-    if not use_fp8_w8a8 or not use_int8_w8a8 or block_shape is not None:
+    if (
+        not (use_fp8_w8a8 or use_int8_w8a8)
+        or block_shape is not None
+        or (is_hip_ and get_bool_env_var("CK_MOE"))
+    ):
        padded_size = 0

    # Check constraints.