From f58b929a5185029c88cea378901d9e011380f173 Mon Sep 17 00:00:00 2001
From: Yineng Zhang <me@zhyncs.com>
Date: Sun, 13 Apr 2025 00:45:59 -0700
Subject: [PATCH] chore: upgrade sgl-kernel 0.0.8.post3 (#5342)

---
 python/pyproject.toml                               | 2 +-
 python/sglang/srt/layers/quantization/fp8_kernel.py | 2 +-
 scripts/ci_install_dependency.sh                    | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/python/pyproject.toml b/python/pyproject.toml
index cbeb42163..96803787d 100644
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -47,7 +47,7 @@ runtime_common = [
 
 srt = [
     "sglang[runtime_common]",
-    "sgl-kernel==0.0.8",
+    "sgl-kernel==0.0.8.post3",
     "flashinfer_python==0.2.3",
     "torch==2.5.1",
     "torchvision==0.20.1",
diff --git a/python/sglang/srt/layers/quantization/fp8_kernel.py b/python/sglang/srt/layers/quantization/fp8_kernel.py
index 43db8c79a..72ec99c6f 100644
--- a/python/sglang/srt/layers/quantization/fp8_kernel.py
+++ b/python/sglang/srt/layers/quantization/fp8_kernel.py
@@ -41,7 +41,7 @@ fp8_type_ = torch.float8_e4m3fnuz if _is_hip else torch.float8_e4m3fn
 
 _is_cuda = is_cuda()
 if _is_cuda:
-    import deep_gemm  # `pip install "sgl-kernel>=0.0.4.post3"`
+    import deep_gemm
     from sgl_kernel import sgl_per_token_group_quant_fp8, sgl_per_token_quant_fp8
 
     sm_version = get_device_sm()
diff --git a/scripts/ci_install_dependency.sh b/scripts/ci_install_dependency.sh
index d7b4c89b4..80cda93ce 100755
--- a/scripts/ci_install_dependency.sh
+++ b/scripts/ci_install_dependency.sh
@@ -20,7 +20,7 @@ pip install --upgrade pip
 
 # Install flashinfer and sgl-kernel
 pip install flashinfer_python==0.2.3 --find-links ${FLASHINFER_REPO} --no-cache-dir
-pip install sgl-kernel==0.0.8 --no-cache-dir
+pip install sgl-kernel==0.0.8.post3 --no-cache-dir
 
 # Install the main package
 pip install -e "python[all]" --find-links ${FLASHINFER_REPO}