diff --git a/.github/workflows/release-docker-dev.yml b/.github/workflows/release-docker-dev.yml index b47f998c2..4b4701099 100644 --- a/.github/workflows/release-docker-dev.yml +++ b/.github/workflows/release-docker-dev.yml @@ -3,7 +3,7 @@ name: Build and Push Development Docker Images on: workflow_dispatch: schedule: - - cron: '0 0 * * *' + - cron: "0 0 * * *" jobs: build-dev-x86: @@ -14,7 +14,7 @@ jobs: variant: - version: 12.9.1 type: all - tag: dev + tag: dev-x86 steps: - name: Delete huge unnecessary tools folder run: rm -rf /opt/hostedtoolcache @@ -46,15 +46,15 @@ jobs: run: | docker buildx build --platform linux/amd64 --push -f docker/Dockerfile --build-arg CUDA_VERSION=${{ matrix.variant.version }} --build-arg BUILD_TYPE=${{ matrix.variant.type }} --build-arg CMAKE_BUILD_PARALLEL_LEVEL=$(nproc) -t lmsysorg/sglang:${{ matrix.variant.tag }} --no-cache . - build-blackwell-arm: + build-dev-arm: if: ${{ github.repository == 'sgl-project/sglang' }} - runs-on: labubu + runs-on: sgl-kernel-release-node-arm strategy: matrix: variant: - version: 12.9.1 - type: blackwell_aarch - tag: blackwell-cu129 + type: blackwell_aarch64 + tag: dev-arm64 steps: - name: Delete huge unnecessary tools folder run: rm -rf /opt/hostedtoolcache @@ -84,19 +84,18 @@ jobs: - name: Build and Push Blackwell Image (ARM) run: | - docker buildx build --platform linux/arm64 --push -f docker/Dockerfile --build-arg CUDA_VERSION=${{ matrix.variant.version }} --build-arg BUILD_TYPE=${{ matrix.variant.type }} --build-arg CMAKE_BUILD_PARALLEL_LEVEL=$(nproc) -t lmsysorg/sglang:${{ matrix.variant.tag }}-arm64 --no-cache . - + docker buildx build --platform linux/arm64 --push -f docker/Dockerfile --build-arg CUDA_VERSION=${{ matrix.variant.version }} --build-arg BUILD_TYPE=${{ matrix.variant.type }} --build-arg CMAKE_BUILD_PARALLEL_LEVEL=$(nproc) -t lmsysorg/sglang:${{ matrix.variant.tag }} --no-cache . create-manifests: runs-on: ubuntu-22.04 - needs: [build-dev-x86, build-blackwell-arm] + needs: [build-dev-x86, build-dev-arm] if: ${{ github.repository == 'sgl-project/sglang' }} strategy: matrix: variant: - - tag: dev-manifest - x86_tag: dev - arm64_tag: blackwell-cu129-arm64 + - tag: dev + x86_tag: dev-x86 + arm64_tag: dev-arm64 steps: - uses: docker/setup-buildx-action@v3 - uses: docker/login-action@v2 diff --git a/.github/workflows/release-docker.yml b/.github/workflows/release-docker.yml index 811fedb26..7b5a6dda7 100644 --- a/.github/workflows/release-docker.yml +++ b/.github/workflows/release-docker.yml @@ -10,16 +10,16 @@ on: jobs: publish-x86: if: github.repository == 'sgl-project/sglang' - environment: 'prod' + environment: "prod" strategy: matrix: variant: - - cuda_version: '12.6.1' - build_type: 'all' - - cuda_version: '12.8.1' - build_type: 'blackwell' - - cuda_version: '12.9.1' - build_type: 'blackwell' + - cuda_version: "12.6.1" + build_type: "all" + - cuda_version: "12.8.1" + build_type: "blackwell" + - cuda_version: "12.9.1" + build_type: "blackwell" runs-on: nvidia steps: - name: Delete huge unnecessary tools folder @@ -82,13 +82,13 @@ jobs: publish-arm64: if: github.repository == 'sgl-project/sglang' - environment: 'prod' + environment: "prod" strategy: matrix: variant: - - cuda_version: '12.9.1' - build_type: 'blackwell_aarch' - runs-on: labubu + - cuda_version: "12.9.1" + build_type: "blackwell_aarch64" + runs-on: sgl-kernel-release-node-arm steps: - name: Delete huge unnecessary tools folder run: rm -rf /opt/hostedtoolcache diff --git a/.github/workflows/release-whl-kernel.yml b/.github/workflows/release-whl-kernel.yml index 75f94996f..c80fd1fd1 100644 --- a/.github/workflows/release-whl-kernel.yml +++ b/.github/workflows/release-whl-kernel.yml @@ -206,7 +206,7 @@ jobs: build-cu129-aarch64: if: github.repository == 'sgl-project/sglang' - runs-on: labubu + runs-on: sgl-kernel-release-node-arm strategy: matrix: python-version: ["3.10"] diff --git a/docker/Dockerfile b/docker/Dockerfile index e774b1f57..a6229288d 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -93,9 +93,10 @@ RUN python3 -m pip install --no-cache-dir --upgrade pip setuptools wheel html5li && FLASHINFER_LOGGING_LEVEL=warning python3 -m flashinfer --download-cubin -# Download source files +# Download NVSHMEM source files +# We use Tom's DeepEP fork for GB200 for now RUN wget https://developer.download.nvidia.com/compute/redist/nvshmem/3.3.9/source/nvshmem_src_cuda12-all-all-3.3.9.tar.gz && \ - if [ "$BUILD_TYPE" = "blackwell_aarch" ] && [ "$(uname -m)" = "aarch64" ]; then \ + if [ "$BUILD_TYPE" = "blackwell_aarch64" ]; then \ git clone https://github.com/fzyzcjy/DeepEP.git \ && cd DeepEP && git checkout 1b14ad661c7640137fcfe93cccb2694ede1220b0 && sed -i 's/#define NUM_CPU_TIMEOUT_SECS 100/#define NUM_CPU_TIMEOUT_SECS 1000/' csrc/kernels/configs.cuh && cd .. ; \ else \ @@ -108,7 +109,7 @@ RUN wget https://developer.download.nvidia.com/compute/redist/nvshmem/3.3.9/sour # Build and install NVSHMEM RUN cd /sgl-workspace/nvshmem && \ - if [ "$BUILD_TYPE" = "blackwell" ] || [ "$BUILD_TYPE" = "blackwell_aarch" ]; then CUDA_ARCH="90;100;120"; else CUDA_ARCH="90"; fi && \ + export CUDA_ARCH="90;100;120" && \ NVSHMEM_SHMEM_SUPPORT=0 \ NVSHMEM_UCX_SUPPORT=0 \ NVSHMEM_USE_NCCL=0 \