Fix fast decode plan for flashinfer v0.4.0rc1 and upgrade sgl-kernel 0.3.11 (#10634)

Co-authored-by: zhyncs <me@zhyncs.com>
2025-09-19 01:25:29 -07:00
parent 4f2055ad56
commit 3fa3c22ae2
5 changed files with 10 additions and 7 deletions
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -85,7 +85,7 @@ RUN python3 -m pip install --no-cache-dir --upgrade pip setuptools wheel html5li
 && python3 -m pip install --no-cache-dir nvidia-nccl-cu12==2.27.6 --force-reinstall --no-deps \
 && python3 -m flashinfer --download-cubin \
 && if [ "$CUDA_VERSION" = "12.6.1" ]; then \
-      python3 -m pip install --no-cache-dir https://github.com/sgl-project/whl/releases/download/v0.3.10/sgl_kernel-0.3.10+cu124-cp310-abi3-manylinux2014_x86_64.whl --force-reinstall --no-deps ; \
+      python3 -m pip install --no-cache-dir https://github.com/sgl-project/whl/releases/download/v0.3.11/sgl_kernel-0.3.11+cu124-cp310-abi3-manylinux2014_x86_64.whl --force-reinstall --no-deps ; \
    fi

 # Download source files