From 349bb2c92af41f7b6937a5ecb8a5b1d4d18b0b6e Mon Sep 17 00:00:00 2001 From: fzyzcjy <5236035+fzyzcjy@users.noreply.github.com> Date: Sun, 15 Jun 2025 10:24:54 +0800 Subject: [PATCH] Fix error when disabling new DeepGEMM (#7198) --- python/sglang/srt/layers/moe/ep_moe/token_dispatcher.py | 6 ++++-- python/sglang/srt/models/deepseek_v2.py | 5 ++++- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/python/sglang/srt/layers/moe/ep_moe/token_dispatcher.py b/python/sglang/srt/layers/moe/ep_moe/token_dispatcher.py index 33b7d6929..339f2e8bc 100644 --- a/python/sglang/srt/layers/moe/ep_moe/token_dispatcher.py +++ b/python/sglang/srt/layers/moe/ep_moe/token_dispatcher.py @@ -584,8 +584,10 @@ class _DeepEPDispatcherImplLowLatency(_DeepEPDispatcherImplBase): use_fp8=use_fp8, async_finish=not self.return_recv_hook, return_recv_hook=self.return_recv_hook, - round_scale=deep_gemm_wrapper.DEEPGEMM_V202506, - use_ue8m0=deep_gemm_wrapper.DEEPGEMM_V202506, + round_scale=deep_gemm_wrapper.ENABLE_JIT_DEEPGEMM + and deep_gemm_wrapper.DEEPGEMM_V202506, + use_ue8m0=deep_gemm_wrapper.ENABLE_JIT_DEEPGEMM + and deep_gemm_wrapper.DEEPGEMM_V202506, ) ) return packed_recv_hidden, packed_recv_count, event, hook diff --git a/python/sglang/srt/models/deepseek_v2.py b/python/sglang/srt/models/deepseek_v2.py index 352480f2d..0ebc53442 100644 --- a/python/sglang/srt/models/deepseek_v2.py +++ b/python/sglang/srt/models/deepseek_v2.py @@ -1914,7 +1914,10 @@ class DeepseekV2ForCausalLM(nn.Module): self_attn.w_vc = bind_or_assign(self_attn.w_vc, w_vc.contiguous()) self_attn.use_deep_gemm_bmm = True - if deep_gemm_wrapper.DEEPGEMM_SCALE_UE8M0: + if ( + deep_gemm_wrapper.ENABLE_JIT_DEEPGEMM + and deep_gemm_wrapper.DEEPGEMM_SCALE_UE8M0 + ): self._weight_requant_ue8m0() def _weight_requant_ue8m0(self):