From b2435be68275676f40720dcacac339545a768297 Mon Sep 17 00:00:00 2001
From: b8zhong <b8zhong@uwaterloo.ca>
Date: Mon, 15 Sep 2025 22:30:28 -0700
Subject: [PATCH] Cache the result of `is_blackwell` platform check (#10498)

---
 .../layers/quantization/deep_gemm_wrapper/configurer.py  | 9 ++-------
 python/sglang/srt/utils.py                               | 1 +
 2 files changed, 3 insertions(+), 7 deletions(-)

diff --git a/python/sglang/srt/layers/quantization/deep_gemm_wrapper/configurer.py b/python/sglang/srt/layers/quantization/deep_gemm_wrapper/configurer.py
index ecf7d1647..662c70c34 100644
--- a/python/sglang/srt/layers/quantization/deep_gemm_wrapper/configurer.py
+++ b/python/sglang/srt/layers/quantization/deep_gemm_wrapper/configurer.py
@@ -2,7 +2,7 @@ import logging
 
 import torch
 
-from sglang.srt.utils import get_bool_env_var, get_device_sm
+from sglang.srt.utils import get_bool_env_var, get_device_sm, is_blackwell
 
 logger = logging.getLogger(__name__)
 
@@ -21,12 +21,7 @@ def _compute_enable_deep_gemm():
     return get_bool_env_var("SGL_ENABLE_JIT_DEEPGEMM", default="true")
 
 
-def _is_blackwell_arch() -> bool:
-    major, minor = torch.cuda.get_device_capability(torch.cuda.current_device())
-    return major == 10
-
-
 ENABLE_JIT_DEEPGEMM = _compute_enable_deep_gemm()
 
-DEEPGEMM_BLACKWELL = ENABLE_JIT_DEEPGEMM and _is_blackwell_arch()
+DEEPGEMM_BLACKWELL = ENABLE_JIT_DEEPGEMM and is_blackwell()
 DEEPGEMM_SCALE_UE8M0 = DEEPGEMM_BLACKWELL
diff --git a/python/sglang/srt/utils.py b/python/sglang/srt/utils.py
index 0301a8dbc..2b32f6d73 100644
--- a/python/sglang/srt/utils.py
+++ b/python/sglang/srt/utils.py
@@ -167,6 +167,7 @@ is_ampere_with_cuda_12_3 = lambda: _check(8)
 is_hopper_with_cuda_12_3 = lambda: _check(9)
 
 
+@lru_cache(maxsize=1)
 def is_blackwell():
     if not is_cuda():
         return False