diff --git a/.github/workflows/release-docker-blackwell.yml b/.github/workflows/release-docker-blackwell.yml deleted file mode 100644 index 2911e7115..000000000 --- a/.github/workflows/release-docker-blackwell.yml +++ /dev/null @@ -1,36 +0,0 @@ -name: Build Blackwell Docker Image - -on: - workflow_dispatch: - schedule: - - cron: '0 0 * * *' - -jobs: - build-dev: - if: ${{ github.repository == 'sgl-project/sglang' }} - runs-on: ubuntu-22.04 - steps: - - name: Checkout repository - uses: actions/checkout@v4 - - - name: Free disk space - uses: jlumbroso/free-disk-space@main - with: - tool-cache: false - docker-images: false - android: true - dotnet: true - haskell: true - large-packages: true - swap-storage: false - - - name: Login to Docker Hub - uses: docker/login-action@v2 - with: - username: ${{ secrets.DOCKERHUB_USERNAME }} - password: ${{ secrets.DOCKERHUB_TOKEN }} - - - name: Build and Push Blackwell Image - run: | - docker buildx build --output type=image,compression=zstd . -f docker/Dockerfile.blackwell -t lmsysorg/sglang:blackwell --no-cache - docker push lmsysorg/sglang:blackwell diff --git a/.github/workflows/release-docker-deepep.yml b/.github/workflows/release-docker-deepep.yml deleted file mode 100644 index 25992a280..000000000 --- a/.github/workflows/release-docker-deepep.yml +++ /dev/null @@ -1,47 +0,0 @@ -name: Build DeepEP Docker Image - -on: - workflow_dispatch: - schedule: - - cron: '0 0 * * *' - -jobs: - build-dev: - if: ${{ github.repository == 'sgl-project/sglang' }} - runs-on: ubuntu-22.04 - - strategy: - matrix: - variant: - - base: lmsysorg/sglang:latest - tag: deepep - - base: lmsysorg/sglang:dev - tag: dev-deepep - - base: lmsysorg/sglang:blackwell - tag: blackwell-deepep - - steps: - - name: Checkout repository - uses: actions/checkout@v4 - - - name: Free disk space - uses: jlumbroso/free-disk-space@main - with: - tool-cache: false - docker-images: false - android: true - dotnet: true - haskell: true - large-packages: true - swap-storage: false - - - name: Login to Docker Hub - uses: docker/login-action@v2 - with: - username: ${{ secrets.DOCKERHUB_USERNAME }} - password: ${{ secrets.DOCKERHUB_TOKEN }} - - - name: Build and Push Docker Image - run: | - docker build . -f docker/Dockerfile.deepep --build-arg BASE_IMAGE=${{ matrix.variant.base }} -t lmsysorg/sglang:${{ matrix.variant.tag }} --no-cache - docker push lmsysorg/sglang:${{ matrix.variant.tag }} diff --git a/.github/workflows/release-docker.yml b/.github/workflows/release-docker.yml index b8e2ac503..5ebbed6c3 100644 --- a/.github/workflows/release-docker.yml +++ b/.github/workflows/release-docker.yml @@ -14,8 +14,13 @@ jobs: environment: 'prod' strategy: matrix: - cuda_version: ['12.4.1'] - build_type: ['all'] + cuda_version: ['12.6.1', '12.8.1'] + build_type: ['all', 'blackwell'] + exclude: + - cuda_version: '12.6.1' + build_type: 'blackwell' + - cuda_version: '12.8.1' + build_type: 'all' steps: - name: Delete huge unnecessary tools folder run: rm -rf /opt/hostedtoolcache @@ -41,6 +46,10 @@ jobs: cuda_tag="cu124" elif [ "${{ matrix.cuda_version }}" = "12.5.1" ]; then cuda_tag="cu125" + elif [ "${{ matrix.cuda_version }}" = "12.6.1" ]; then + cuda_tag="cu126" + elif [ "${{ matrix.cuda_version }}" = "12.8.1" ]; then + cuda_tag="cu128" else echo "Unsupported CUDA version" exit 1 @@ -52,15 +61,17 @@ jobs: tag_suffix="" elif [ "${{ matrix.build_type }}" = "srt" ]; then tag_suffix="-srt" + elif [ "${{ matrix.build_type }}" = "blackwell" ]; then + tag_suffix="-b200" else echo "Unsupported build type" exit 1 fi - docker build . -f docker/Dockerfile --build-arg CUDA_VERSION=${{ matrix.cuda_version }} --build-arg BUILD_TYPE=${{ matrix.build_type }} -t lmsysorg/sglang:${tag}${tag_suffix} --no-cache + docker buildx build --output type=image,compression=zstd . -f docker/Dockerfile --build-arg CUDA_VERSION=${{ matrix.cuda_version }} --build-arg BUILD_TYPE=${{ matrix.build_type }} -t lmsysorg/sglang:${tag}${tag_suffix} --no-cache docker push lmsysorg/sglang:${tag}${tag_suffix} - if [ "${{ matrix.cuda_version }}" = "12.4.1" ]; then + if [ "${{ matrix.cuda_version }}" = "12.6.1" ]; then docker tag lmsysorg/sglang:${tag}${tag_suffix} lmsysorg/sglang:latest${tag_suffix} docker push lmsysorg/sglang:latest${tag_suffix} fi diff --git a/docker/Dockerfile b/docker/Dockerfile index c8b207600..0d403a8ce 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,51 +1,98 @@ -ARG CUDA_VERSION=12.4.1 - -FROM nvcr.io/nvidia/tritonserver:24.12-py3-min +ARG CUDA_VERSION=12.6.1 +FROM nvidia/cuda:${CUDA_VERSION}-cudnn-devel-ubuntu22.04 ARG BUILD_TYPE=all -ENV DEBIAN_FRONTEND=noninteractive +ENV DEBIAN_FRONTEND=noninteractive \ + CUDA_HOME=/usr/local/cuda \ + GDRCOPY_HOME=/usr/src/gdrdrv-2.4.4/ \ + NVSHMEM_DIR=/sgl-workspace/nvshmem/install +# Set timezone and install all packages RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \ - && echo 'tzdata tzdata/Zones/America select Los_Angeles' | debconf-set-selections \ - && apt update -y \ - && apt install software-properties-common -y \ - && apt install python3 python3-pip -y \ - && apt install curl git sudo libibverbs-dev -y \ - && apt install rdma-core infiniband-diags openssh-server perftest -y \ - && python3 --version \ - && python3 -m pip --version \ - && rm -rf /var/lib/apt/lists/* \ - && apt clean + && echo 'tzdata tzdata/Zones/America select Los_Angeles' | debconf-set-selections \ + && apt-get update && apt-get install -y --no-install-recommends \ + tzdata \ + software-properties-common netcat-openbsd kmod unzip openssh-server \ + curl wget lsof zsh ccache tmux htop git-lfs tree \ + python3 python3-pip python3-dev libpython3-dev \ + build-essential cmake \ + libopenmpi-dev libnuma1 libnuma-dev \ + libibverbs-dev libibverbs1 libibumad3 \ + librdmacm1 libnl-3-200 libnl-route-3-200 libnl-route-3-dev libnl-3-dev \ + ibverbs-providers infiniband-diags perftest \ + libgoogle-glog-dev libgtest-dev libjsoncpp-dev libunwind-dev \ + libboost-all-dev libssl-dev \ + libgrpc-dev libgrpc++-dev libprotobuf-dev protobuf-compiler-grpc \ + pybind11-dev \ + libhiredis-dev libcurl4-openssl-dev \ + libczmq4 libczmq-dev \ + libfabric-dev \ + patchelf \ + nvidia-dkms-550 \ + devscripts debhelper fakeroot dkms check libsubunit0 libsubunit-dev \ + && ln -sf /usr/bin/python3 /usr/bin/python \ + && rm -rf /var/lib/apt/lists/* \ + && apt-get clean -# For openbmb/MiniCPM models -RUN pip3 install datamodel_code_generator --break-system-packages +# GDRCopy installation +RUN mkdir -p /tmp/gdrcopy && cd /tmp \ + && git clone https://github.com/NVIDIA/gdrcopy.git -b v2.4.4 \ + && cd gdrcopy/packages \ + && CUDA=/usr/local/cuda ./build-deb-packages.sh \ + && dpkg -i gdrdrv-dkms_*.deb libgdrapi_*.deb gdrcopy-tests_*.deb gdrcopy_*.deb \ + && cd / && rm -rf /tmp/gdrcopy +# Fix DeepEP IBGDA symlink +RUN ln -sf /usr/lib/x86_64-linux-gnu/libmlx5.so.1 /usr/lib/x86_64-linux-gnu/libmlx5.so + +# Clone and install SGLang WORKDIR /sgl-workspace +RUN python3 -m pip install --no-cache-dir --upgrade pip setuptools wheel html5lib six \ + && git clone --depth=1 https://github.com/sgl-project/sglang.git \ + && cd sglang \ + && case "$CUDA_VERSION" in \ + 12.6.1) CUINDEX=126 ;; \ + 12.8.1) CUINDEX=128 ;; \ + *) echo "Unsupported CUDA version: $CUDA_VERSION" && exit 1 ;; \ + esac \ + && python3 -m pip install --no-cache-dir -e "python[${BUILD_TYPE}]" --extra-index-url https://download.pytorch.org/whl/cu${CUINDEX} \ + && if [ "$CUDA_VERSION" = "12.8.1" ]; then \ + python3 -m pip install --no-cache-dir nvidia-nccl-cu12==2.27.3 --force-reinstall --no-deps ; \ + python3 -m pip install --no-cache-dir https://github.com/sgl-project/whl/releases/download/v0.1.9/sgl_kernel-0.1.9+cu128-cp39-abi3-manylinux2014_x86_64.whl --force-reinstall --no-deps ; \ + fi -ARG CUDA_VERSION -RUN python3 -m pip install --upgrade pip setuptools wheel html5lib six --break-system-packages --ignore-installed \ - && git clone --depth=1 https://github.com/sgl-project/sglang.git \ - && if [ "$CUDA_VERSION" = "12.1.1" ]; then \ - export CUINDEX=121; \ - elif [ "$CUDA_VERSION" = "12.4.1" ]; then \ - export CUINDEX=124; \ - elif [ "$CUDA_VERSION" = "12.8.1" ]; then \ - export CUINDEX=124; \ - elif [ "$CUDA_VERSION" = "11.8.0" ]; then \ - export CUINDEX=118; \ - python3 -m pip install --no-cache-dir sgl-kernel -i https://docs.sglang.ai/whl/cu118 --break-system-packages; \ - else \ - echo "Unsupported CUDA version: $CUDA_VERSION" && exit 1; \ - fi \ - && if [ "$CUDA_VERSION" = "12.4.1" ]; then \ - python3 -m pip install --no-cache-dir torch --index-url https://download.pytorch.org/whl/cu126 --break-system-packages; \ - else \ - python3 -m pip install --no-cache-dir torch --index-url https://download.pytorch.org/whl/cu${CUINDEX} --break-system-packages; \ - fi \ - && cd sglang \ - && python3 -m pip --no-cache-dir install -e "python[${BUILD_TYPE}]" --break-system-packages \ - && if [ "$CUDA_VERSION" = "12.8.1" ]; then \ - python3 -m pip install nvidia-nccl-cu12==2.26.2.post1 --force-reinstall --no-deps --break-system-packages; \ - fi +# Build and install NVSHMEM + DeepEP +RUN wget https://developer.download.nvidia.com/compute/redist/nvshmem/3.2.5/source/nvshmem_src_3.2.5-1.txz \ + && git clone https://github.com/deepseek-ai/DeepEP.git \ + && tar -xf nvshmem_src_3.2.5-1.txz && mv nvshmem_src nvshmem \ + && cd nvshmem \ + && git apply /sgl-workspace/DeepEP/third-party/nvshmem.patch \ + && sed -i '1i#include ' examples/moe_shuffle.cu \ + && rm -f /sgl-workspace/nvshmem_src_3.2.5-1.txz \ + && NVSHMEM_SHMEM_SUPPORT=0 \ + NVSHMEM_UCX_SUPPORT=0 \ + NVSHMEM_USE_NCCL=0 \ + NVSHMEM_MPI_SUPPORT=0 \ + NVSHMEM_IBGDA_SUPPORT=1 \ + NVSHMEM_PMIX_SUPPORT=0 \ + NVSHMEM_TIMEOUT_DEVICE_POLLING=0 \ + NVSHMEM_USE_GDRCOPY=1 \ + cmake -S . -B build/ -DCMAKE_INSTALL_PREFIX=${NVSHMEM_DIR} -DCMAKE_CUDA_ARCHITECTURES=90 \ + && cmake --build build --target install -j \ + && cd /sgl-workspace/DeepEP \ + && NVSHMEM_DIR=${NVSHMEM_DIR} pip install . + +# Python tools +RUN python3 -m pip install --no-cache-dir \ + datamodel_code_generator \ + mooncake_transfer_engine==0.3.3.post2 \ + pre-commit \ + pytest \ + black \ + isort \ + icdiff \ + uv \ + wheel \ + scikit-build-core ENV DEBIAN_FRONTEND=interactive diff --git a/docker/Dockerfile.blackwell b/docker/Dockerfile.blackwell deleted file mode 100644 index de3d6d034..000000000 --- a/docker/Dockerfile.blackwell +++ /dev/null @@ -1,215 +0,0 @@ -FROM nvidia/cuda:12.8.1-cudnn-devel-ubuntu22.04 - -ENV DEBIAN_FRONTEND=noninteractive - -WORKDIR /sgl-workspace - -RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \ - && echo 'tzdata tzdata/Zones/America select Los_Angeles' | debconf-set-selections \ - && apt update -y \ - && apt install software-properties-common -y \ - && apt install python3 python3-pip -y \ - && apt install curl git sudo libibverbs-dev -y \ - && apt install rdma-core infiniband-diags openssh-server perftest libnuma1 -y \ - && apt install lsof zsh ccache tmux htop git-lfs tree unzip -y \ - && python3 --version \ - && python3 -m pip --version \ - && pip3 install --upgrade pip \ - && rm -rf /var/lib/apt/lists/* \ - && apt clean - -RUN pip3 install https://github.com/sgl-project/whl/releases/download/v0.1.9/sgl_kernel-0.1.9+cu128-cp39-abi3-manylinux2014_x86_64.whl \ - && pip3 install setuptools==75.0.0 wheel scikit-build-core - -RUN git clone --depth=1 https://github.com/sgl-project/sglang.git \ - && cd sglang && pip3 install -e "python[blackwell]" --extra-index-url https://download.pytorch.org/whl/cu128 - -RUN pip3 install nvidia-nccl-cu12==2.27.3 --force-reinstall --no-deps - -ENV DEBIAN_FRONTEND=interactive - -# Install minimal Python packages -RUN pip3 install --no-cache-dir \ - pytest \ - black \ - isort \ - icdiff \ - uv \ - pre-commit - -# Install diff-so-fancy -RUN curl -LSso /usr/local/bin/diff-so-fancy https://github.com/so-fancy/diff-so-fancy/releases/download/v1.4.4/diff-so-fancy \ - && chmod +x /usr/local/bin/diff-so-fancy - -# Install clang-format -RUN curl -LSso /usr/local/bin/clang-format https://github.com/muttleyxd/clang-tools-static-binaries/releases/download/master-32d3ac78/clang-format-16_linux-amd64 \ - && chmod +x /usr/local/bin/clang-format - -# Install clangd -RUN curl -L https://github.com/clangd/clangd/releases/download/18.1.3/clangd-linux-18.1.3.zip -o clangd.zip \ - && unzip clangd.zip \ - && cp -r clangd_18.1.3/bin/* /usr/local/bin/ \ - && cp -r clangd_18.1.3/lib/* /usr/local/lib/ \ - && rm -rf clangd_18.1.3 clangd.zip - -# Install CMake -RUN wget https://github.com/Kitware/CMake/releases/download/v3.31.1/cmake-3.31.1-linux-x86_64.tar.gz \ - && tar -xzf cmake-3.31.1-linux-x86_64.tar.gz \ - && cp -r cmake-3.31.1-linux-x86_64/bin/* /usr/local/bin/ \ - && cp -r cmake-3.31.1-linux-x86_64/share/* /usr/local/share/ \ - && rm -rf cmake-3.31.1-linux-x86_64 cmake-3.31.1-linux-x86_64.tar.gz - -# Add yank script -COPY --chown=root:root <<-"EOF" /usr/local/bin/yank -#!/bin/bash -put() { - esc=$1 - test -n "$TMUX" -o -z "${TERM##screen*}" && esc="\033Ptmux;\033$esc\033\\" - printf "$esc" -} -put "\033]52;c;!\a" -buf=$( cat "$@" ) -len=$( printf %s "$buf" | wc -c ) max=74994 -test $len -gt $max && echo "$0: input is $(( len - max )) bytes too long" >&2 -put "\033]52;c;$( printf %s "$buf" | head -c $max | base64 | tr -d '\r\n' )\a" -test -n "$TMUX" && tmux set-buffer "$buf" ||: -EOF - -RUN chmod +x /usr/local/bin/yank - -# Install oh-my-zsh and plugins -RUN sh -c "$(curl -fsSL https://raw.githubusercontent.com/ohmyzsh/ohmyzsh/master/tools/install.sh)" "" --unattended \ - && git clone https://github.com/zsh-users/zsh-autosuggestions ${ZSH_CUSTOM:-~/.oh-my-zsh/custom}/plugins/zsh-autosuggestions \ - && git clone https://github.com/zsh-users/zsh-syntax-highlighting.git ${ZSH_CUSTOM:-~/.oh-my-zsh/custom}/plugins/zsh-syntax-highlighting - -# Configure Vim -COPY --chown=root:root <<-"EOF" /root/.vimrc -function! Yank(text) abort - let escape = system('yank', a:text) - if v:shell_error - echoerr escape - else - call writefile([escape], '/dev/tty', 'b') - endif -endfunction - -noremap y y:call Yank(@0) - -" automatically run yank(1) whenever yanking in Vim -function! CopyYank() abort - call Yank(join(v:event.regcontents, "\n")) -endfunction - -autocmd TextYankPost * call CopyYank() - -" Basic settings -set number -syntax on -set mouse=a -filetype indent on - -" Indentation -set autoindent nosmartindent -set smarttab -set expandtab -set shiftwidth=4 -set softtabstop=4 - -" Visual guides -set colorcolumn=120 -highlight ColorColumn ctermbg=5 - -" Status line -set laststatus=2 -set statusline=%<%f\ %h%m%r%=%{\"[\".(&fenc==\"\"?&enc:&fenc).((exists(\"+bomb\")\ &&\ &bomb)?\",B\":\"\").\"]\ \"}%k\ %-14.(%l,%c%V%)\ %P - -" Backspace behavior -set backspace=2 - -" Encoding -set encoding=utf-8 -set fileencoding=utf-8 -EOF - -# Configure tmux -COPY --chown=root:root <<-"EOF" /root/.tmux.conf -# Pane border styling -set -g pane-border-style fg='#742727',bg=black -set -g pane-active-border-style fg=red,bg=black - -# Status bar styling -set -g status-style bg='#0C8A92',fg=black - -# Change prefix key to backtick -set-option -g prefix ` -unbind C-b -bind-key ` send-prefix - -# Split panes using - and = with current path -unbind '"' -bind - splitw -v -c '#{pane_current_path}' -unbind '%' -bind = splitw -h -c '#{pane_current_path}' - -# Vi mode settings -bind-key -T copy-mode-vi Y send-keys -X copy-pipe 'yank > #{pane_tty}' -set-window-option -g mode-keys vi - -# Other settings -set-option -g escape-time 0 -set-option -g base-index 1 -set-window-option -g mouse on -EOF - -# Configure Git -RUN git config --global core.editor "vim" \ - && git config --global core.whitespace "fix,-indent-with-non-tab,trailing-space,cr-at-eol" \ - && git config --global core.pager "diff-so-fancy | less --tabs=4 -RFX" \ - && git config --global color.ui true \ - && git config --global color."diff-highlight".oldNormal "red bold" \ - && git config --global color."diff-highlight".oldHighlight "red bold 52" \ - && git config --global color."diff-highlight".newNormal "green bold" \ - && git config --global color."diff-highlight".newHighlight "green bold 22" \ - && git config --global color.diff.meta "11" \ - && git config --global color.diff.frag "magenta bold" \ - && git config --global color.diff.commit "yellow bold" \ - && git config --global color.diff.old "red bold" \ - && git config --global color.diff.new "green bold" \ - && git config --global color.diff.whitespace "red reverse" \ - && git config --global alias.lg "log --color --graph --pretty=format:'%Cred%h%Creset - %s %Cgreen(%cr) %C(bold blue)<%an>%Creset%C(auto)%d%Creset' --abbrev-commit --" \ - && git config --global http.sslVerify false \ - && git config --global pull.rebase true - -# Configure zsh -COPY --chown=root:root <<-"EOF" /root/.zshrc -export ZSH="/root/.oh-my-zsh" - -# Theme -ZSH_THEME="robbyrussell" - -# Plugins -plugins=( - git - z - zsh-autosuggestions - zsh-syntax-highlighting -) - -source $ZSH/oh-my-zsh.sh - -# Aliases -alias ll='ls -alF' -alias la='ls -A' -alias l='ls -CF' -alias vi='vim' - -# Enhanced history -HISTSIZE=10000 -SAVEHIST=10000 -setopt HIST_IGNORE_ALL_DUPS -setopt HIST_FIND_NO_DUPS -setopt INC_APPEND_HISTORY -EOF - -# Set workspace directory -WORKDIR /sgl-workspace/sglang diff --git a/docker/Dockerfile.deepep b/docker/Dockerfile.deepep deleted file mode 100644 index 8ddb6a459..000000000 --- a/docker/Dockerfile.deepep +++ /dev/null @@ -1,114 +0,0 @@ -ARG BASE_IMAGE -FROM ${BASE_IMAGE} -# Deps -RUN apt-get update && apt-get install -y netcat-openbsd \ - libopenmpi-dev \ - kmod \ - rdma-core \ - infiniband-diags \ - openssh-server \ - perftest \ - ibverbs-providers \ - libibumad3 \ - libibverbs1 \ - libnl-3-200 \ - libnl-route-3-200 \ - librdmacm1 \ - build-essential \ - cmake \ - libibverbs-dev \ - libgoogle-glog-dev \ - libgtest-dev \ - libjsoncpp-dev \ - libnuma-dev \ - libibverbs-dev \ - libunwind-dev \ - libgoogle-glog-dev \ - libpython3-dev \ - libboost-all-dev \ - libssl-dev \ - libgrpc-dev \ - libgrpc++-dev \ - libprotobuf-dev \ - protobuf-compiler-grpc \ - pybind11-dev \ - libhiredis-dev \ - pkg-config \ - patchelf \ - ccache \ - libcurl4-openssl-dev \ - curl \ - pkg-config libczmq4 libczmq-dev \ - libnl-route-3-dev libnl-3-dev librdmacm1 \ - libhiredis-dev \ - nvidia-dkms-535 \ - build-essential \ - devscripts \ - debhelper \ - fakeroot \ - dkms \ - check \ - libsubunit0 \ - libsubunit-dev \ - libfabric-dev \ - python3 \ - python3-pip \ - && rm -rf /var/lib/apt/lists/* \ - && ln -s /usr/bin/python3 /usr/bin/python - -# CMake -RUN wget https://github.com/Kitware/CMake/releases/download/v3.27.4/cmake-3.27.4-linux-x86_64.sh \ -&& chmod +x cmake-3.27.4-linux-x86_64.sh \ -&& ./cmake-3.27.4-linux-x86_64.sh --skip-license --prefix=/usr/local \ -&& rm cmake-3.27.4-linux-x86_64.sh - - -ENV GDRCOPY_HOME=/usr/src/gdrdrv-2.4.4/ -# GDRCopy -RUN mkdir -p /tmp \ - && cd /tmp \ - && git clone https://github.com/NVIDIA/gdrcopy.git -b v2.4.4 \ - && cd /tmp/gdrcopy/packages \ - && CUDA=/usr/local/cuda ./build-deb-packages.sh \ - && dpkg -i gdrdrv-dkms_*.deb \ - && dpkg -i libgdrapi_*.deb \ - && dpkg -i gdrcopy-tests_*.deb \ - && dpkg -i gdrcopy_*.deb - - -# IBGDA dependency -RUN ln -sf /usr/lib/x86_64-linux-gnu/libmlx5.so.1 /usr/lib/x86_64-linux-gnu/libmlx5.so - -# DeepEP -WORKDIR /sgl-workspace -RUN git clone https://github.com/deepseek-ai/DeepEP.git - -# NVSHMEM -RUN wget https://developer.download.nvidia.com/compute/redist/nvshmem/3.2.5/source/nvshmem_src_3.2.5-1.txz -RUN tar -xf nvshmem_src_3.2.5-1.txz \ - && mv nvshmem_src nvshmem \ - && cd /sgl-workspace/nvshmem \ - && git apply /sgl-workspace/DeepEP/third-party/nvshmem.patch \ - && sed -i '1i#include ' /sgl-workspace/nvshmem/examples/moe_shuffle.cu \ - && cat /sgl-workspace/nvshmem/examples/moe_shuffle.cu - -# Compile NVSHMEM -ENV CUDA_HOME=/usr/local/cuda -RUN cd /sgl-workspace/nvshmem && NVSHMEM_SHMEM_SUPPORT=0 \ - NVSHMEM_UCX_SUPPORT=0 \ - NVSHMEM_USE_NCCL=0 \ - NVSHMEM_MPI_SUPPORT=0 \ - NVSHMEM_IBGDA_SUPPORT=1 \ - NVSHMEM_PMIX_SUPPORT=0 \ - NVSHMEM_TIMEOUT_DEVICE_POLLING=0 \ - NVSHMEM_USE_GDRCOPY=1 \ - cmake -S . -B build/ -DCMAKE_INSTALL_PREFIX=/sgl-workspace/nvshmem/install -DCMAKE_CUDA_ARCHITECTURES=90 \ - && cd build \ - && make install -j - -WORKDIR /sgl-workspace/DeepEP -ENV NVSHMEM_DIR=/sgl-workspace/nvshmem/install -RUN NVSHMEM_DIR=/sgl-workspace/nvshmem/install pip install --break-system-packages . - -# Install mooncake transfer engine -RUN pip install --upgrade mooncake_transfer_engine --break-system-packages