Fix fast decode plan for flashinfer v0.4.0rc1 and upgrade sgl-kernel 0.3.11 (#10634)
Co-authored-by: zhyncs <me@zhyncs.com>
This commit is contained in:
@@ -85,7 +85,7 @@ RUN python3 -m pip install --no-cache-dir --upgrade pip setuptools wheel html5li
|
||||
&& python3 -m pip install --no-cache-dir nvidia-nccl-cu12==2.27.6 --force-reinstall --no-deps \
|
||||
&& python3 -m flashinfer --download-cubin \
|
||||
&& if [ "$CUDA_VERSION" = "12.6.1" ]; then \
|
||||
python3 -m pip install --no-cache-dir https://github.com/sgl-project/whl/releases/download/v0.3.10/sgl_kernel-0.3.10+cu124-cp310-abi3-manylinux2014_x86_64.whl --force-reinstall --no-deps ; \
|
||||
python3 -m pip install --no-cache-dir https://github.com/sgl-project/whl/releases/download/v0.3.11/sgl_kernel-0.3.11+cu124-cp310-abi3-manylinux2014_x86_64.whl --force-reinstall --no-deps ; \
|
||||
fi
|
||||
|
||||
# Download source files
|
||||
|
||||
Reference in New Issue
Block a user