Support Blackwell DeepEP docker images (#6868)
This commit is contained in:
17
.github/workflows/release-docker-deepep.yml
vendored
17
.github/workflows/release-docker-deepep.yml
vendored
@@ -9,6 +9,17 @@ jobs:
|
|||||||
build-dev:
|
build-dev:
|
||||||
if: ${{ github.repository == 'sgl-project/sglang' }}
|
if: ${{ github.repository == 'sgl-project/sglang' }}
|
||||||
runs-on: ubuntu-22.04
|
runs-on: ubuntu-22.04
|
||||||
|
|
||||||
|
strategy:
|
||||||
|
matrix:
|
||||||
|
variant:
|
||||||
|
- base: lmsysorg/sglang:latest
|
||||||
|
tag: deepep
|
||||||
|
- base: lmsysorg/sglang:dev
|
||||||
|
tag: dev-deepep
|
||||||
|
- base: lmsysorg/sglang:blackwell
|
||||||
|
tag: blackwell-deepep
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout repository
|
- name: Checkout repository
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
@@ -30,7 +41,7 @@ jobs:
|
|||||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||||
|
|
||||||
- name: Build and Push DeepEP Image
|
- name: Build and Push Docker Image
|
||||||
run: |
|
run: |
|
||||||
docker build . -f docker/Dockerfile.deepep -t lmsysorg/sglang:deepep --no-cache
|
docker build . -f docker/Dockerfile.deepep --build-arg BASE_IMAGE=${{ matrix.variant.base }} -t lmsysorg/sglang:${{ matrix.variant.tag }} --no-cache
|
||||||
docker push lmsysorg/sglang:deepep
|
docker push lmsysorg/sglang:${{ matrix.variant.tag }}
|
||||||
|
|||||||
36
.github/workflows/release-docker-dev-deepep.yml
vendored
36
.github/workflows/release-docker-dev-deepep.yml
vendored
@@ -1,36 +0,0 @@
|
|||||||
name: Build Dev-DeepEP Docker Image
|
|
||||||
|
|
||||||
on:
|
|
||||||
workflow_dispatch:
|
|
||||||
schedule:
|
|
||||||
- cron: '0 0 * * *'
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
build-dev:
|
|
||||||
if: ${{ github.repository == 'sgl-project/sglang' }}
|
|
||||||
runs-on: ubuntu-22.04
|
|
||||||
steps:
|
|
||||||
- name: Checkout repository
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
|
|
||||||
- name: Free disk space
|
|
||||||
uses: jlumbroso/free-disk-space@main
|
|
||||||
with:
|
|
||||||
tool-cache: false
|
|
||||||
docker-images: false
|
|
||||||
android: true
|
|
||||||
dotnet: true
|
|
||||||
haskell: true
|
|
||||||
large-packages: true
|
|
||||||
swap-storage: false
|
|
||||||
|
|
||||||
- name: Login to Docker Hub
|
|
||||||
uses: docker/login-action@v2
|
|
||||||
with:
|
|
||||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
|
||||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
|
||||||
|
|
||||||
- name: Build and Push DeepEP Image
|
|
||||||
run: |
|
|
||||||
docker build . -f docker/Dockerfile.dev-deepep -t lmsysorg/sglang:dev-deepep --no-cache
|
|
||||||
docker push lmsysorg/sglang:dev-deepep
|
|
||||||
@@ -1,4 +1,5 @@
|
|||||||
FROM lmsysorg/sglang:latest
|
ARG BASE_IMAGE
|
||||||
|
FROM ${BASE_IMAGE}
|
||||||
|
|
||||||
# CMake
|
# CMake
|
||||||
RUN apt-get update \
|
RUN apt-get update \
|
||||||
@@ -55,6 +56,9 @@ RUN tar -xf nvshmem_src_3.2.5-1.txz \
|
|||||||
WORKDIR /sgl-workspace/nvshmem
|
WORKDIR /sgl-workspace/nvshmem
|
||||||
RUN git apply /sgl-workspace/DeepEP/third-party/nvshmem.patch
|
RUN git apply /sgl-workspace/DeepEP/third-party/nvshmem.patch
|
||||||
|
|
||||||
|
RUN sed -i '1i#include <unistd.h>' /sgl-workspace/nvshmem/examples/moe_shuffle.cu && \
|
||||||
|
cat /sgl-workspace/nvshmem/examples/moe_shuffle.cu
|
||||||
|
|
||||||
WORKDIR /sgl-workspace/nvshmem
|
WORKDIR /sgl-workspace/nvshmem
|
||||||
ENV CUDA_HOME=/usr/local/cuda
|
ENV CUDA_HOME=/usr/local/cuda
|
||||||
RUN NVSHMEM_SHMEM_SUPPORT=0 \
|
RUN NVSHMEM_SHMEM_SUPPORT=0 \
|
||||||
@@ -71,7 +75,7 @@ RUN NVSHMEM_SHMEM_SUPPORT=0 \
|
|||||||
|
|
||||||
WORKDIR /sgl-workspace/DeepEP
|
WORKDIR /sgl-workspace/DeepEP
|
||||||
ENV NVSHMEM_DIR=/sgl-workspace/nvshmem/install
|
ENV NVSHMEM_DIR=/sgl-workspace/nvshmem/install
|
||||||
RUN NVSHMEM_DIR=/sgl-workspace/nvshmem/install pip install .
|
RUN NVSHMEM_DIR=/sgl-workspace/nvshmem/install pip install --break-system-packages .
|
||||||
|
|
||||||
# Set workspace
|
# Set workspace
|
||||||
WORKDIR /sgl-workspace
|
WORKDIR /sgl-workspace
|
||||||
|
|||||||
@@ -224,5 +224,8 @@ setopt HIST_FIND_NO_DUPS
|
|||||||
setopt INC_APPEND_HISTORY
|
setopt INC_APPEND_HISTORY
|
||||||
EOF
|
EOF
|
||||||
|
|
||||||
|
RUN set -euxo ; \
|
||||||
|
curl --proto '=https' --tlsv1.2 -sSf https://just.systems/install.sh | bash -s -- --to /usr/local/bin
|
||||||
|
|
||||||
# Set workspace directory
|
# Set workspace directory
|
||||||
WORKDIR /sgl-workspace/sglang
|
WORKDIR /sgl-workspace/sglang
|
||||||
|
|||||||
@@ -1,80 +0,0 @@
|
|||||||
FROM lmsysorg/sglang:dev
|
|
||||||
|
|
||||||
# CMake
|
|
||||||
RUN apt-get update \
|
|
||||||
&& apt-get install -y --no-install-recommends \
|
|
||||||
build-essential \
|
|
||||||
wget \
|
|
||||||
libssl-dev \
|
|
||||||
&& wget https://github.com/Kitware/CMake/releases/download/v3.27.4/cmake-3.27.4-linux-x86_64.sh \
|
|
||||||
&& chmod +x cmake-3.27.4-linux-x86_64.sh \
|
|
||||||
&& ./cmake-3.27.4-linux-x86_64.sh --skip-license --prefix=/usr/local \
|
|
||||||
&& rm cmake-3.27.4-linux-x86_64.sh
|
|
||||||
|
|
||||||
# Python
|
|
||||||
RUN apt-get update \
|
|
||||||
&& apt-get install -y --no-install-recommends \
|
|
||||||
python3 \
|
|
||||||
python3-pip \
|
|
||||||
&& ln -s /usr/bin/python3 /usr/bin/python
|
|
||||||
|
|
||||||
# GDRCopy
|
|
||||||
WORKDIR /tmp
|
|
||||||
RUN git clone https://github.com/NVIDIA/gdrcopy.git
|
|
||||||
WORKDIR /tmp/gdrcopy
|
|
||||||
RUN git checkout v2.4.4
|
|
||||||
|
|
||||||
RUN apt update
|
|
||||||
RUN apt install -y nvidia-dkms-535
|
|
||||||
RUN apt install -y build-essential devscripts debhelper fakeroot pkg-config dkms
|
|
||||||
RUN apt install -y check libsubunit0 libsubunit-dev
|
|
||||||
|
|
||||||
WORKDIR /tmp/gdrcopy/packages
|
|
||||||
RUN CUDA=/usr/local/cuda ./build-deb-packages.sh
|
|
||||||
RUN dpkg -i gdrdrv-dkms_*.deb
|
|
||||||
RUN dpkg -i libgdrapi_*.deb
|
|
||||||
RUN dpkg -i gdrcopy-tests_*.deb
|
|
||||||
RUN dpkg -i gdrcopy_*.deb
|
|
||||||
|
|
||||||
ENV GDRCOPY_HOME=/usr/src/gdrdrv-2.4.4/
|
|
||||||
|
|
||||||
# IBGDA dependency
|
|
||||||
RUN ln -s /usr/lib/x86_64-linux-gnu/libmlx5.so.1 /usr/lib/x86_64-linux-gnu/libmlx5.so
|
|
||||||
RUN apt-get install -y libfabric-dev
|
|
||||||
|
|
||||||
# DeepEP
|
|
||||||
WORKDIR /sgl-workspace
|
|
||||||
RUN git clone https://github.com/deepseek-ai/DeepEP.git
|
|
||||||
|
|
||||||
# NVSHMEM
|
|
||||||
WORKDIR /sgl-workspace
|
|
||||||
RUN wget https://developer.download.nvidia.com/compute/redist/nvshmem/3.2.5/source/nvshmem_src_3.2.5-1.txz
|
|
||||||
RUN tar -xf nvshmem_src_3.2.5-1.txz \
|
|
||||||
&& mv nvshmem_src nvshmem
|
|
||||||
|
|
||||||
WORKDIR /sgl-workspace/nvshmem
|
|
||||||
RUN git apply /sgl-workspace/DeepEP/third-party/nvshmem.patch
|
|
||||||
|
|
||||||
WORKDIR /sgl-workspace/nvshmem
|
|
||||||
ENV CUDA_HOME=/usr/local/cuda
|
|
||||||
RUN NVSHMEM_SHMEM_SUPPORT=0 \
|
|
||||||
NVSHMEM_UCX_SUPPORT=0 \
|
|
||||||
NVSHMEM_USE_NCCL=0 \
|
|
||||||
NVSHMEM_MPI_SUPPORT=0 \
|
|
||||||
NVSHMEM_IBGDA_SUPPORT=1 \
|
|
||||||
NVSHMEM_PMIX_SUPPORT=0 \
|
|
||||||
NVSHMEM_TIMEOUT_DEVICE_POLLING=0 \
|
|
||||||
NVSHMEM_USE_GDRCOPY=1 \
|
|
||||||
cmake -S . -B build/ -DCMAKE_INSTALL_PREFIX=/sgl-workspace/nvshmem/install -DCMAKE_CUDA_ARCHITECTURES=90 \
|
|
||||||
&& cd build \
|
|
||||||
&& make install -j
|
|
||||||
|
|
||||||
WORKDIR /sgl-workspace/DeepEP
|
|
||||||
ENV NVSHMEM_DIR=/sgl-workspace/nvshmem/install
|
|
||||||
RUN NVSHMEM_DIR=/sgl-workspace/nvshmem/install pip install .
|
|
||||||
|
|
||||||
RUN set -euxo ; \
|
|
||||||
curl --proto '=https' --tlsv1.2 -sSf https://just.systems/install.sh | bash -s -- --to /usr/local/bin
|
|
||||||
|
|
||||||
# Set workspace
|
|
||||||
WORKDIR /sgl-workspace
|
|
||||||
Reference in New Issue
Block a user