diff --git a/docker/Dockerfile b/docker/Dockerfile index 3e335a1be..acb9a06f1 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -9,7 +9,7 @@ ARG DEEPEP_COMMIT=9af0e0d0e74f3577af1979c9b9e1ac2cad0104ee ARG FLASHMLA_COMMIT=1408756a88e52a25196b759eaf8db89d2b51b5a1 ARG FAST_HADAMARD_TRANSFORM_COMMIT=7fd811c2b47f63b0b08d2582619f939e14dad77c ARG CMAKE_BUILD_PARALLEL_LEVEL=2 -ARG SGL_KERNEL_VERSION=0.3.15 +ARG SGL_KERNEL_VERSION=0.3.16.post3 ENV DEBIAN_FRONTEND=noninteractive \ CUDA_HOME=/usr/local/cuda \ GDRCOPY_HOME=/usr/src/gdrdrv-2.4.4/ \ @@ -152,14 +152,6 @@ RUN if [ "$TARGETARCH" = "amd64" ]; then \ pip install -v . ; \ fi -# Install fast-hadamard-transform -RUN if [ "$TARGETARCH" = "amd64" ]; then \ - git clone https://github.com/Dao-AILab/fast-hadamard-transform && \ - cd fast-hadamard-transform && \ - git checkout ${FAST_HADAMARD_TRANSFORM_COMMIT} && \ - pip install . ; \ - fi - # Python tools RUN python3 -m pip install --no-cache-dir \ datamodel_code_generator \ diff --git a/python/pyproject.toml b/python/pyproject.toml index 57954507e..2812d43c1 100755 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -55,7 +55,7 @@ dependencies = [ "scipy", "sentencepiece", "setproctitle", - "sgl-kernel==0.3.15", + "sgl-kernel==0.3.16.post3", "soundfile==0.13.1", "tiktoken", "timm==1.0.16", diff --git a/python/sglang/srt/entrypoints/engine.py b/python/sglang/srt/entrypoints/engine.py index 7107a611e..bee34adc6 100644 --- a/python/sglang/srt/entrypoints/engine.py +++ b/python/sglang/srt/entrypoints/engine.py @@ -693,7 +693,7 @@ def _set_envs_and_config(server_args: ServerArgs): if _is_cuda and not get_bool_env_var("SGLANG_SKIP_SGL_KERNEL_VERSION_CHECK"): assert_pkg_version( "sgl-kernel", - "0.3.15", + "0.3.16.post3", "Please reinstall the latest version with `pip install sgl-kernel --force-reinstall`", ) diff --git a/python/sglang/srt/layers/attention/nsa/nsa_indexer.py b/python/sglang/srt/layers/attention/nsa/nsa_indexer.py index 25a191e08..e0d452680 100644 --- a/python/sglang/srt/layers/attention/nsa/nsa_indexer.py +++ b/python/sglang/srt/layers/attention/nsa/nsa_indexer.py @@ -74,7 +74,7 @@ class BaseIndexerMetadata(ABC): def rotate_activation(x: torch.Tensor) -> torch.Tensor: assert x.dtype == torch.bfloat16 - from fast_hadamard_transform import hadamard_transform + from sgl_kernel import hadamard_transform hidden_size = x.size(-1) assert ( diff --git a/scripts/ci/ci_install_dependency.sh b/scripts/ci/ci_install_dependency.sh index c3a81aa28..ee72f77d9 100755 --- a/scripts/ci/ci_install_dependency.sh +++ b/scripts/ci/ci_install_dependency.sh @@ -96,14 +96,6 @@ if [ "$RUN_DEEPSEEK_V32" = "1" ]; then git submodule update --init --recursive FLASH_MLA_DISABLE_SM100=${FLASH_MLA_DISABLE_SM100} $PIP_CMD install -v . $PIP_INSTALL_SUFFIX --no-build-isolation cd .. - - # Install fast-hadamard-transform - FAST_HADAMARD_TRANSFORM_COMMIT="7fd811c2b47f63b0b08d2582619f939e14dad77c" - git clone https://github.com/Dao-AILab/fast-hadamard-transform - cd fast-hadamard-transform - git checkout ${FAST_HADAMARD_TRANSFORM_COMMIT} - $PIP_CMD install . $PIP_INSTALL_SUFFIX --no-build-isolation - cd .. fi # Show current packages