From 0917c5da8cf47ce8a1117b03bda9ff30a7e25d64 Mon Sep 17 00:00:00 2001 From: fzyzcjy <5236035+fzyzcjy@users.noreply.github.com> Date: Tue, 21 Oct 2025 07:38:35 +0800 Subject: [PATCH] Support mixing cutedsl and deepgemm backend (#11807) --- python/sglang/srt/layers/moe/ep_moe/layer.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/python/sglang/srt/layers/moe/ep_moe/layer.py b/python/sglang/srt/layers/moe/ep_moe/layer.py index d05b24098..a94fafd70 100644 --- a/python/sglang/srt/layers/moe/ep_moe/layer.py +++ b/python/sglang/srt/layers/moe/ep_moe/layer.py @@ -191,11 +191,15 @@ class DeepEPMoE(FusedMoE): assert deep_gemm_wrapper.ENABLE_JIT_DEEPGEMM and self.use_fp8_w8a8 return self.forward_deepgemm_contiguous(dispatch_output) elif DispatchOutputChecker.format_is_deepep_ll(dispatch_output): - if get_moe_runner_backend().is_flashinfer_cutedsl(): + if ( + get_moe_runner_backend().is_flashinfer_cutedsl() + and self.quant_config.get_name() == "modelopt_fp4" + ): return self.forward_flashinfer_cutedsl( dispatch_output, down_gemm_overlap_args=down_gemm_overlap_args ) assert deep_gemm_wrapper.ENABLE_JIT_DEEPGEMM and self.use_fp8_w8a8 + assert down_gemm_overlap_args is None return self.forward_deepgemm_masked(dispatch_output) else: raise ValueError(