Fix Dockerfile not installing correct version of DeepEP for arm build (#11773)
This commit is contained in:
4
.github/workflows/release-docker-dev.yml
vendored
4
.github/workflows/release-docker-dev.yml
vendored
@@ -15,11 +15,13 @@ jobs:
|
|||||||
- runner: x64-docker-build-node
|
- runner: x64-docker-build-node
|
||||||
platform: linux/amd64
|
platform: linux/amd64
|
||||||
build_type: all
|
build_type: all
|
||||||
|
grace_blackwell: 0
|
||||||
tag: dev-x86
|
tag: dev-x86
|
||||||
version: 12.9.1
|
version: 12.9.1
|
||||||
- runner: arm-docker-build-node
|
- runner: arm-docker-build-node
|
||||||
platform: linux/arm64
|
platform: linux/arm64
|
||||||
build_type: all
|
build_type: all
|
||||||
|
grace_blackwell: 1
|
||||||
tag: dev-arm64
|
tag: dev-arm64
|
||||||
version: 12.9.1
|
version: 12.9.1
|
||||||
steps:
|
steps:
|
||||||
@@ -51,7 +53,7 @@ jobs:
|
|||||||
|
|
||||||
- name: Build and Push Dev Image
|
- name: Build and Push Dev Image
|
||||||
run: |
|
run: |
|
||||||
docker buildx build --platform ${{ matrix.platform }} --push -f docker/Dockerfile --build-arg CUDA_VERSION=${{ matrix.version }} --build-arg BUILD_TYPE=${{ matrix.build_type }} --build-arg CMAKE_BUILD_PARALLEL_LEVEL=$(nproc) -t lmsysorg/sglang:${{ matrix.tag }} --no-cache .
|
docker buildx build --platform ${{ matrix.platform }} --push -f docker/Dockerfile --build-arg CUDA_VERSION=${{ matrix.version }} --build-arg BUILD_TYPE=${{ matrix.build_type }} --build-arg GRACE_BLACKWELL=${{ matrix.grace_blackwell }} --build-arg CMAKE_BUILD_PARALLEL_LEVEL=$(nproc) -t lmsysorg/sglang:${{ matrix.tag }} --no-cache .
|
||||||
|
|
||||||
create-manifests:
|
create-manifests:
|
||||||
runs-on: ubuntu-22.04
|
runs-on: ubuntu-22.04
|
||||||
|
|||||||
4
.github/workflows/release-docker.yml
vendored
4
.github/workflows/release-docker.yml
vendored
@@ -16,6 +16,7 @@ jobs:
|
|||||||
variant:
|
variant:
|
||||||
- cuda_version: "12.9.1"
|
- cuda_version: "12.9.1"
|
||||||
build_type: "all"
|
build_type: "all"
|
||||||
|
grace_blackwell: 0
|
||||||
runs-on: x64-docker-build-node
|
runs-on: x64-docker-build-node
|
||||||
steps:
|
steps:
|
||||||
- name: Delete huge unnecessary tools folder
|
- name: Delete huge unnecessary tools folder
|
||||||
@@ -55,6 +56,7 @@ jobs:
|
|||||||
-f docker/Dockerfile \
|
-f docker/Dockerfile \
|
||||||
--build-arg CUDA_VERSION=${{ matrix.variant.cuda_version }} \
|
--build-arg CUDA_VERSION=${{ matrix.variant.cuda_version }} \
|
||||||
--build-arg BUILD_TYPE=${{ matrix.variant.build_type }} \
|
--build-arg BUILD_TYPE=${{ matrix.variant.build_type }} \
|
||||||
|
--build-arg GRACE_BLACKWELL=${{ matrix.variant.grace_blackwell }} \
|
||||||
-t lmsysorg/sglang:${tag} \
|
-t lmsysorg/sglang:${tag} \
|
||||||
--no-cache \
|
--no-cache \
|
||||||
.
|
.
|
||||||
@@ -67,6 +69,7 @@ jobs:
|
|||||||
variant:
|
variant:
|
||||||
- cuda_version: "12.9.1"
|
- cuda_version: "12.9.1"
|
||||||
build_type: "all"
|
build_type: "all"
|
||||||
|
grace_blackwell: 1
|
||||||
runs-on: arm-docker-build-node
|
runs-on: arm-docker-build-node
|
||||||
steps:
|
steps:
|
||||||
- name: Delete huge unnecessary tools folder
|
- name: Delete huge unnecessary tools folder
|
||||||
@@ -95,6 +98,7 @@ jobs:
|
|||||||
-f docker/Dockerfile \
|
-f docker/Dockerfile \
|
||||||
--build-arg CUDA_VERSION=${{ matrix.variant.cuda_version }} \
|
--build-arg CUDA_VERSION=${{ matrix.variant.cuda_version }} \
|
||||||
--build-arg BUILD_TYPE=${{ matrix.variant.build_type }} \
|
--build-arg BUILD_TYPE=${{ matrix.variant.build_type }} \
|
||||||
|
--build-arg GRACE_BLACKWELL=${{ matrix.variant.grace_blackwell }} \
|
||||||
-t lmsysorg/sglang:${tag} \
|
-t lmsysorg/sglang:${tag} \
|
||||||
--no-cache \
|
--no-cache \
|
||||||
.
|
.
|
||||||
|
|||||||
@@ -2,6 +2,7 @@ ARG CUDA_VERSION=12.9.1
|
|||||||
FROM nvidia/cuda:${CUDA_VERSION}-cudnn-devel-ubuntu22.04 AS base
|
FROM nvidia/cuda:${CUDA_VERSION}-cudnn-devel-ubuntu22.04 AS base
|
||||||
ARG TARGETARCH
|
ARG TARGETARCH
|
||||||
|
|
||||||
|
ARG GRACE_BLACKWELL=0
|
||||||
ARG BUILD_TYPE=all
|
ARG BUILD_TYPE=all
|
||||||
ARG BRANCH_TYPE=remote
|
ARG BRANCH_TYPE=remote
|
||||||
ARG DEEPEP_COMMIT=9af0e0d0e74f3577af1979c9b9e1ac2cad0104ee
|
ARG DEEPEP_COMMIT=9af0e0d0e74f3577af1979c9b9e1ac2cad0104ee
|
||||||
@@ -99,7 +100,7 @@ RUN python3 -m pip install --no-cache-dir --upgrade pip setuptools wheel html5li
|
|||||||
# Download NVSHMEM source files
|
# Download NVSHMEM source files
|
||||||
# We use Tom's DeepEP fork for GB200 for now
|
# We use Tom's DeepEP fork for GB200 for now
|
||||||
RUN wget https://developer.download.nvidia.com/compute/redist/nvshmem/3.3.9/source/nvshmem_src_cuda12-all-all-3.3.9.tar.gz && \
|
RUN wget https://developer.download.nvidia.com/compute/redist/nvshmem/3.3.9/source/nvshmem_src_cuda12-all-all-3.3.9.tar.gz && \
|
||||||
if [ "$BUILD_TYPE" = "blackwell_aarch64" ]; then \
|
if [ "$GRACE_BLACKWELL" = "1" ]; then \
|
||||||
git clone https://github.com/fzyzcjy/DeepEP.git \
|
git clone https://github.com/fzyzcjy/DeepEP.git \
|
||||||
&& cd DeepEP && git checkout 1b14ad661c7640137fcfe93cccb2694ede1220b0 && sed -i 's/#define NUM_CPU_TIMEOUT_SECS 100/#define NUM_CPU_TIMEOUT_SECS 1000/' csrc/kernels/configs.cuh && cd .. ; \
|
&& cd DeepEP && git checkout 1b14ad661c7640137fcfe93cccb2694ede1220b0 && sed -i 's/#define NUM_CPU_TIMEOUT_SECS 100/#define NUM_CPU_TIMEOUT_SECS 1000/' csrc/kernels/configs.cuh && cd .. ; \
|
||||||
else \
|
else \
|
||||||
@@ -112,7 +113,7 @@ RUN wget https://developer.download.nvidia.com/compute/redist/nvshmem/3.3.9/sour
|
|||||||
|
|
||||||
# Build and install NVSHMEM
|
# Build and install NVSHMEM
|
||||||
RUN cd /sgl-workspace/nvshmem && \
|
RUN cd /sgl-workspace/nvshmem && \
|
||||||
if [ "$BUILD_TYPE" = "blackwell" ] || [ "$BUILD_TYPE" = "blackwell_aarch" ]; then CUDA_ARCH="90;100;120"; else CUDA_ARCH="90"; fi && \
|
if [ "$GRACE_BLACKWELL" = "1" ]; then CUDA_ARCH="90;100;120"; else CUDA_ARCH="90"; fi && \
|
||||||
NVSHMEM_SHMEM_SUPPORT=0 \
|
NVSHMEM_SHMEM_SUPPORT=0 \
|
||||||
NVSHMEM_UCX_SUPPORT=0 \
|
NVSHMEM_UCX_SUPPORT=0 \
|
||||||
NVSHMEM_USE_NCCL=0 \
|
NVSHMEM_USE_NCCL=0 \
|
||||||
|
|||||||
Reference in New Issue
Block a user