From 611720919d0f6cf8a481f19bbd4046dcba9a9130 Mon Sep 17 00:00:00 2001 From: Yineng Zhang Date: Fri, 11 Apr 2025 20:48:24 -0700 Subject: [PATCH] fix: use deepgemm only on hopper (#5310) --- python/sglang/srt/layers/quantization/fp8_kernel.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/sglang/srt/layers/quantization/fp8_kernel.py b/python/sglang/srt/layers/quantization/fp8_kernel.py index 535d4ecf6..43db8c79a 100644 --- a/python/sglang/srt/layers/quantization/fp8_kernel.py +++ b/python/sglang/srt/layers/quantization/fp8_kernel.py @@ -45,7 +45,7 @@ if _is_cuda: from sgl_kernel import sgl_per_token_group_quant_fp8, sgl_per_token_quant_fp8 sm_version = get_device_sm() - if sm_version >= 90 and get_bool_env_var("SGL_ENABLE_JIT_DEEPGEMM", default="true"): + if sm_version == 90 and get_bool_env_var("SGL_ENABLE_JIT_DEEPGEMM", default="true"): _enable_jit_deepgemm = True