diff --git a/python/sglang/srt/layers/moe/ep_moe/layer.py b/python/sglang/srt/layers/moe/ep_moe/layer.py index d05b24098..a94fafd70 100644 --- a/python/sglang/srt/layers/moe/ep_moe/layer.py +++ b/python/sglang/srt/layers/moe/ep_moe/layer.py @@ -191,11 +191,15 @@ class DeepEPMoE(FusedMoE): assert deep_gemm_wrapper.ENABLE_JIT_DEEPGEMM and self.use_fp8_w8a8 return self.forward_deepgemm_contiguous(dispatch_output) elif DispatchOutputChecker.format_is_deepep_ll(dispatch_output): - if get_moe_runner_backend().is_flashinfer_cutedsl(): + if ( + get_moe_runner_backend().is_flashinfer_cutedsl() + and self.quant_config.get_name() == "modelopt_fp4" + ): return self.forward_flashinfer_cutedsl( dispatch_output, down_gemm_overlap_args=down_gemm_overlap_args ) assert deep_gemm_wrapper.ENABLE_JIT_DEEPGEMM and self.use_fp8_w8a8 + assert down_gemm_overlap_args is None return self.forward_deepgemm_masked(dispatch_output) else: raise ValueError(