From f58b929a5185029c88cea378901d9e011380f173 Mon Sep 17 00:00:00 2001 From: Yineng Zhang Date: Sun, 13 Apr 2025 00:45:59 -0700 Subject: [PATCH] chore: upgrade sgl-kernel 0.0.8.post3 (#5342) --- python/pyproject.toml | 2 +- python/sglang/srt/layers/quantization/fp8_kernel.py | 2 +- scripts/ci_install_dependency.sh | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/python/pyproject.toml b/python/pyproject.toml index cbeb42163..96803787d 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -47,7 +47,7 @@ runtime_common = [ srt = [ "sglang[runtime_common]", - "sgl-kernel==0.0.8", + "sgl-kernel==0.0.8.post3", "flashinfer_python==0.2.3", "torch==2.5.1", "torchvision==0.20.1", diff --git a/python/sglang/srt/layers/quantization/fp8_kernel.py b/python/sglang/srt/layers/quantization/fp8_kernel.py index 43db8c79a..72ec99c6f 100644 --- a/python/sglang/srt/layers/quantization/fp8_kernel.py +++ b/python/sglang/srt/layers/quantization/fp8_kernel.py @@ -41,7 +41,7 @@ fp8_type_ = torch.float8_e4m3fnuz if _is_hip else torch.float8_e4m3fn _is_cuda = is_cuda() if _is_cuda: - import deep_gemm # `pip install "sgl-kernel>=0.0.4.post3"` + import deep_gemm from sgl_kernel import sgl_per_token_group_quant_fp8, sgl_per_token_quant_fp8 sm_version = get_device_sm() diff --git a/scripts/ci_install_dependency.sh b/scripts/ci_install_dependency.sh index d7b4c89b4..80cda93ce 100755 --- a/scripts/ci_install_dependency.sh +++ b/scripts/ci_install_dependency.sh @@ -20,7 +20,7 @@ pip install --upgrade pip # Install flashinfer and sgl-kernel pip install flashinfer_python==0.2.3 --find-links ${FLASHINFER_REPO} --no-cache-dir -pip install sgl-kernel==0.0.8 --no-cache-dir +pip install sgl-kernel==0.0.8.post3 --no-cache-dir # Install the main package pip install -e "python[all]" --find-links ${FLASHINFER_REPO}