Update sgl-kernel and remove fast hadamard depedency (#11844)
This commit is contained in:
@@ -9,7 +9,7 @@ ARG DEEPEP_COMMIT=9af0e0d0e74f3577af1979c9b9e1ac2cad0104ee
|
|||||||
ARG FLASHMLA_COMMIT=1408756a88e52a25196b759eaf8db89d2b51b5a1
|
ARG FLASHMLA_COMMIT=1408756a88e52a25196b759eaf8db89d2b51b5a1
|
||||||
ARG FAST_HADAMARD_TRANSFORM_COMMIT=7fd811c2b47f63b0b08d2582619f939e14dad77c
|
ARG FAST_HADAMARD_TRANSFORM_COMMIT=7fd811c2b47f63b0b08d2582619f939e14dad77c
|
||||||
ARG CMAKE_BUILD_PARALLEL_LEVEL=2
|
ARG CMAKE_BUILD_PARALLEL_LEVEL=2
|
||||||
ARG SGL_KERNEL_VERSION=0.3.15
|
ARG SGL_KERNEL_VERSION=0.3.16.post3
|
||||||
ENV DEBIAN_FRONTEND=noninteractive \
|
ENV DEBIAN_FRONTEND=noninteractive \
|
||||||
CUDA_HOME=/usr/local/cuda \
|
CUDA_HOME=/usr/local/cuda \
|
||||||
GDRCOPY_HOME=/usr/src/gdrdrv-2.4.4/ \
|
GDRCOPY_HOME=/usr/src/gdrdrv-2.4.4/ \
|
||||||
@@ -152,14 +152,6 @@ RUN if [ "$TARGETARCH" = "amd64" ]; then \
|
|||||||
pip install -v . ; \
|
pip install -v . ; \
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Install fast-hadamard-transform
|
|
||||||
RUN if [ "$TARGETARCH" = "amd64" ]; then \
|
|
||||||
git clone https://github.com/Dao-AILab/fast-hadamard-transform && \
|
|
||||||
cd fast-hadamard-transform && \
|
|
||||||
git checkout ${FAST_HADAMARD_TRANSFORM_COMMIT} && \
|
|
||||||
pip install . ; \
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Python tools
|
# Python tools
|
||||||
RUN python3 -m pip install --no-cache-dir \
|
RUN python3 -m pip install --no-cache-dir \
|
||||||
datamodel_code_generator \
|
datamodel_code_generator \
|
||||||
|
|||||||
@@ -55,7 +55,7 @@ dependencies = [
|
|||||||
"scipy",
|
"scipy",
|
||||||
"sentencepiece",
|
"sentencepiece",
|
||||||
"setproctitle",
|
"setproctitle",
|
||||||
"sgl-kernel==0.3.15",
|
"sgl-kernel==0.3.16.post3",
|
||||||
"soundfile==0.13.1",
|
"soundfile==0.13.1",
|
||||||
"tiktoken",
|
"tiktoken",
|
||||||
"timm==1.0.16",
|
"timm==1.0.16",
|
||||||
|
|||||||
@@ -693,7 +693,7 @@ def _set_envs_and_config(server_args: ServerArgs):
|
|||||||
if _is_cuda and not get_bool_env_var("SGLANG_SKIP_SGL_KERNEL_VERSION_CHECK"):
|
if _is_cuda and not get_bool_env_var("SGLANG_SKIP_SGL_KERNEL_VERSION_CHECK"):
|
||||||
assert_pkg_version(
|
assert_pkg_version(
|
||||||
"sgl-kernel",
|
"sgl-kernel",
|
||||||
"0.3.15",
|
"0.3.16.post3",
|
||||||
"Please reinstall the latest version with `pip install sgl-kernel --force-reinstall`",
|
"Please reinstall the latest version with `pip install sgl-kernel --force-reinstall`",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -74,7 +74,7 @@ class BaseIndexerMetadata(ABC):
|
|||||||
|
|
||||||
def rotate_activation(x: torch.Tensor) -> torch.Tensor:
|
def rotate_activation(x: torch.Tensor) -> torch.Tensor:
|
||||||
assert x.dtype == torch.bfloat16
|
assert x.dtype == torch.bfloat16
|
||||||
from fast_hadamard_transform import hadamard_transform
|
from sgl_kernel import hadamard_transform
|
||||||
|
|
||||||
hidden_size = x.size(-1)
|
hidden_size = x.size(-1)
|
||||||
assert (
|
assert (
|
||||||
|
|||||||
@@ -96,14 +96,6 @@ if [ "$RUN_DEEPSEEK_V32" = "1" ]; then
|
|||||||
git submodule update --init --recursive
|
git submodule update --init --recursive
|
||||||
FLASH_MLA_DISABLE_SM100=${FLASH_MLA_DISABLE_SM100} $PIP_CMD install -v . $PIP_INSTALL_SUFFIX --no-build-isolation
|
FLASH_MLA_DISABLE_SM100=${FLASH_MLA_DISABLE_SM100} $PIP_CMD install -v . $PIP_INSTALL_SUFFIX --no-build-isolation
|
||||||
cd ..
|
cd ..
|
||||||
|
|
||||||
# Install fast-hadamard-transform
|
|
||||||
FAST_HADAMARD_TRANSFORM_COMMIT="7fd811c2b47f63b0b08d2582619f939e14dad77c"
|
|
||||||
git clone https://github.com/Dao-AILab/fast-hadamard-transform
|
|
||||||
cd fast-hadamard-transform
|
|
||||||
git checkout ${FAST_HADAMARD_TRANSFORM_COMMIT}
|
|
||||||
$PIP_CMD install . $PIP_INSTALL_SUFFIX --no-build-isolation
|
|
||||||
cd ..
|
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Show current packages
|
# Show current packages
|
||||||
|
|||||||
Reference in New Issue
Block a user