From b2435be68275676f40720dcacac339545a768297 Mon Sep 17 00:00:00 2001 From: b8zhong Date: Mon, 15 Sep 2025 22:30:28 -0700 Subject: [PATCH] Cache the result of `is_blackwell` platform check (#10498) --- .../layers/quantization/deep_gemm_wrapper/configurer.py | 9 ++------- python/sglang/srt/utils.py | 1 + 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/python/sglang/srt/layers/quantization/deep_gemm_wrapper/configurer.py b/python/sglang/srt/layers/quantization/deep_gemm_wrapper/configurer.py index ecf7d1647..662c70c34 100644 --- a/python/sglang/srt/layers/quantization/deep_gemm_wrapper/configurer.py +++ b/python/sglang/srt/layers/quantization/deep_gemm_wrapper/configurer.py @@ -2,7 +2,7 @@ import logging import torch -from sglang.srt.utils import get_bool_env_var, get_device_sm +from sglang.srt.utils import get_bool_env_var, get_device_sm, is_blackwell logger = logging.getLogger(__name__) @@ -21,12 +21,7 @@ def _compute_enable_deep_gemm(): return get_bool_env_var("SGL_ENABLE_JIT_DEEPGEMM", default="true") -def _is_blackwell_arch() -> bool: - major, minor = torch.cuda.get_device_capability(torch.cuda.current_device()) - return major == 10 - - ENABLE_JIT_DEEPGEMM = _compute_enable_deep_gemm() -DEEPGEMM_BLACKWELL = ENABLE_JIT_DEEPGEMM and _is_blackwell_arch() +DEEPGEMM_BLACKWELL = ENABLE_JIT_DEEPGEMM and is_blackwell() DEEPGEMM_SCALE_UE8M0 = DEEPGEMM_BLACKWELL diff --git a/python/sglang/srt/utils.py b/python/sglang/srt/utils.py index 0301a8dbc..2b32f6d73 100644 --- a/python/sglang/srt/utils.py +++ b/python/sglang/srt/utils.py @@ -167,6 +167,7 @@ is_ampere_with_cuda_12_3 = lambda: _check(8) is_hopper_with_cuda_12_3 = lambda: _check(9) +@lru_cache(maxsize=1) def is_blackwell(): if not is_cuda(): return False