From 5c08a36cbfaeefab461ef7c42d897acae568b97a Mon Sep 17 00:00:00 2001 From: hzh0425 Date: Thu, 17 Jul 2025 12:33:29 +0800 Subject: [PATCH] [Fix] ensure DeepGEMM is only enabled for FP8_W8A8 models (#8110) --- python/sglang/srt/layers/moe/ep_moe/layer.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/python/sglang/srt/layers/moe/ep_moe/layer.py b/python/sglang/srt/layers/moe/ep_moe/layer.py index 353f131c9..e8bfadfb6 100644 --- a/python/sglang/srt/layers/moe/ep_moe/layer.py +++ b/python/sglang/srt/layers/moe/ep_moe/layer.py @@ -1272,6 +1272,12 @@ class DeepEPMoE(EPMoE): routed_scaling_factor=routed_scaling_factor, ) self.deepep_mode = deepep_mode + if deep_gemm_wrapper.ENABLE_JIT_DEEPGEMM: + assert self.use_fp8_w8a8, ( + "DeepGEMM requires an fp8_w8a8 model; " + "alternatively, you can disable DeepGEMM by turning off the ENABLE_JIT_DEEPGEMM environment variable." + ) + if self.deepep_mode.enable_low_latency(): assert ( deep_gemm_wrapper.ENABLE_JIT_DEEPGEMM