Cache the result of is_blackwell platform check (#10498)

2025-09-15 22:30:28 -07:00
parent 5fe39e85a2
commit b2435be682
2 changed files with 3 additions and 7 deletions
--- a/python/sglang/srt/layers/quantization/deep_gemm_wrapper/configurer.py
+++ b/python/sglang/srt/layers/quantization/deep_gemm_wrapper/configurer.py
@@ -2,7 +2,7 @@ import logging

 import torch

-from sglang.srt.utils import get_bool_env_var, get_device_sm
+from sglang.srt.utils import get_bool_env_var, get_device_sm, is_blackwell

 logger = logging.getLogger(__name__)

@@ -21,12 +21,7 @@ def _compute_enable_deep_gemm():
    return get_bool_env_var("SGL_ENABLE_JIT_DEEPGEMM", default="true")


-def _is_blackwell_arch() -> bool:
-    major, minor = torch.cuda.get_device_capability(torch.cuda.current_device())
-    return major == 10
-
-
 ENABLE_JIT_DEEPGEMM = _compute_enable_deep_gemm()

-DEEPGEMM_BLACKWELL = ENABLE_JIT_DEEPGEMM and _is_blackwell_arch()
+DEEPGEMM_BLACKWELL = ENABLE_JIT_DEEPGEMM and is_blackwell()
 DEEPGEMM_SCALE_UE8M0 = DEEPGEMM_BLACKWELL
--- a/python/sglang/srt/utils.py
+++ b/python/sglang/srt/utils.py
@@ -167,6 +167,7 @@ is_ampere_with_cuda_12_3 = lambda: _check(8)
 is_hopper_with_cuda_12_3 = lambda: _check(9)


+@lru_cache(maxsize=1)
 def is_blackwell():
    if not is_cuda():
        return False