adapt to sglang v0.5.2rc1 on dcu
This commit is contained in:
378
docker/Dockerfile
Normal file
378
docker/Dockerfile
Normal file
@@ -0,0 +1,378 @@
|
||||
ARG CUDA_VERSION=12.6.1
|
||||
FROM nvidia/cuda:${CUDA_VERSION}-cudnn-devel-ubuntu22.04 as base
|
||||
|
||||
ARG BUILD_TYPE=all
|
||||
ARG BRANCH_TYPE=remote
|
||||
ARG DEEPEP_COMMIT=b92d0d4860ce6866cd6d31bfbae937f9a7a3772b
|
||||
ARG CMAKE_BUILD_PARALLEL_LEVEL=2
|
||||
ENV DEBIAN_FRONTEND=noninteractive \
|
||||
CUDA_HOME=/usr/local/cuda \
|
||||
GDRCOPY_HOME=/usr/src/gdrdrv-2.4.4/ \
|
||||
NVSHMEM_DIR=/sgl-workspace/nvshmem/install
|
||||
# Add GKE default lib and bin locations.
|
||||
ENV PATH="${PATH}:/usr/local/nvidia/bin" \
|
||||
LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:/usr/local/nvidia/lib:/usr/local/nvidia/lib64"
|
||||
|
||||
RUN apt update && apt install wget -y && apt install software-properties-common -y \
|
||||
&& add-apt-repository ppa:deadsnakes/ppa -y \
|
||||
&& apt install python3.12-full python3.12-dev python3.10-venv -y \
|
||||
&& update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.10 1 \
|
||||
&& update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.12 2 \
|
||||
&& update-alternatives --set python3 /usr/bin/python3.12 \
|
||||
&& wget https://bootstrap.pypa.io/get-pip.py \
|
||||
&& python3 get-pip.py
|
||||
|
||||
# Set timezone and install all packages
|
||||
RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \
|
||||
&& echo 'tzdata tzdata/Zones/America select Los_Angeles' | debconf-set-selections \
|
||||
&& apt-get update && apt-get install -y --no-install-recommends \
|
||||
tzdata \
|
||||
software-properties-common netcat-openbsd kmod unzip openssh-server \
|
||||
curl wget lsof zsh ccache tmux htop git-lfs tree \
|
||||
build-essential cmake \
|
||||
libopenmpi-dev libnuma1 libnuma-dev \
|
||||
libibverbs-dev libibverbs1 libibumad3 \
|
||||
librdmacm1 libnl-3-200 libnl-route-3-200 libnl-route-3-dev libnl-3-dev \
|
||||
ibverbs-providers infiniband-diags perftest \
|
||||
libgoogle-glog-dev libgtest-dev libjsoncpp-dev libunwind-dev \
|
||||
libboost-all-dev libssl-dev \
|
||||
libgrpc-dev libgrpc++-dev libprotobuf-dev protobuf-compiler-grpc \
|
||||
pybind11-dev \
|
||||
libhiredis-dev libcurl4-openssl-dev \
|
||||
libczmq4 libczmq-dev \
|
||||
libfabric-dev \
|
||||
patchelf \
|
||||
nvidia-dkms-550 \
|
||||
devscripts debhelper fakeroot dkms check libsubunit0 libsubunit-dev \
|
||||
&& ln -sf /usr/bin/python3.12 /usr/bin/python \
|
||||
&& rm -rf /var/lib/apt/lists/* \
|
||||
&& apt-get clean
|
||||
|
||||
# GDRCopy installation
|
||||
RUN mkdir -p /tmp/gdrcopy && cd /tmp \
|
||||
&& git clone https://github.com/NVIDIA/gdrcopy.git -b v2.4.4 \
|
||||
&& cd gdrcopy/packages \
|
||||
&& CUDA=/usr/local/cuda ./build-deb-packages.sh \
|
||||
&& dpkg -i gdrdrv-dkms_*.deb libgdrapi_*.deb gdrcopy-tests_*.deb gdrcopy_*.deb \
|
||||
&& cd / && rm -rf /tmp/gdrcopy
|
||||
|
||||
# Fix DeepEP IBGDA symlink
|
||||
RUN ln -sf /usr/lib/x86_64-linux-gnu/libmlx5.so.1 /usr/lib/x86_64-linux-gnu/libmlx5.so
|
||||
|
||||
FROM scratch AS local_src
|
||||
COPY . /src
|
||||
|
||||
FROM base AS build-image
|
||||
# Install SGLang
|
||||
WORKDIR /sgl-workspace
|
||||
ARG BRANCH_TYPE
|
||||
COPY --from=local_src /src /tmp/local_src
|
||||
RUN if [ "$BRANCH_TYPE" = "local" ]; then \
|
||||
cp -r /tmp/local_src /sgl-workspace/sglang; \
|
||||
else \
|
||||
git clone --depth=1 https://github.com/sgl-project/sglang.git /sgl-workspace/sglang; \
|
||||
fi \
|
||||
&& rm -rf /tmp/local_src
|
||||
RUN python3 -m pip install --no-cache-dir --upgrade pip setuptools wheel html5lib six \
|
||||
&& cd sglang \
|
||||
&& case "$CUDA_VERSION" in \
|
||||
12.6.1) CUINDEX=126 ;; \
|
||||
12.8.1) CUINDEX=128 ;; \
|
||||
12.9.1) CUINDEX=129 ;; \
|
||||
*) echo "Unsupported CUDA version: $CUDA_VERSION" && exit 1 ;; \
|
||||
esac \
|
||||
&& python3 -m pip install --no-cache-dir -e "python[${BUILD_TYPE}]" --extra-index-url https://download.pytorch.org/whl/cu${CUINDEX} \
|
||||
&& python3 -m pip install --no-cache-dir nvidia-nccl-cu12==2.27.6 --force-reinstall --no-deps \
|
||||
&& python3 -m flashinfer --download-cubin \
|
||||
&& if [ "$CUDA_VERSION" = "12.8.1" ]; then \
|
||||
python3 -m pip install --no-cache-dir https://github.com/sgl-project/whl/releases/download/v0.3.8/sgl_kernel-0.3.8+cu128-cp310-abi3-manylinux2014_x86_64.whl --force-reinstall --no-deps ; \
|
||||
fi \
|
||||
&& if [ "$CUDA_VERSION" = "12.9.1" ]; then \
|
||||
python3 -m pip install --no-cache-dir https://github.com/sgl-project/whl/releases/download/v0.3.8/sgl_kernel-0.3.8+cu129-cp310-abi3-manylinux2014_x86_64.whl --force-reinstall --no-deps ; \
|
||||
fi
|
||||
|
||||
# Download source files
|
||||
RUN wget https://developer.download.nvidia.com/compute/redist/nvshmem/3.3.9/source/nvshmem_src_cuda12-all-all-3.3.9.tar.gz && \
|
||||
git clone https://github.com/deepseek-ai/DeepEP.git && \
|
||||
cd DeepEP && git checkout ${DEEPEP_COMMIT} && sed -i 's/#define NUM_CPU_TIMEOUT_SECS 100/#define NUM_CPU_TIMEOUT_SECS 1000/' csrc/kernels/configs.cuh && \
|
||||
cd .. && \
|
||||
tar -xf nvshmem_src_cuda12-all-all-3.3.9.tar.gz && \
|
||||
mv nvshmem_src nvshmem && \
|
||||
rm -f /sgl-workspace/nvshmem_src_cuda12-all-all-3.3.9.tar.gz
|
||||
|
||||
# Build and install NVSHMEM
|
||||
RUN cd /sgl-workspace/nvshmem && \
|
||||
NVSHMEM_SHMEM_SUPPORT=0 \
|
||||
NVSHMEM_UCX_SUPPORT=0 \
|
||||
NVSHMEM_USE_NCCL=0 \
|
||||
NVSHMEM_MPI_SUPPORT=0 \
|
||||
NVSHMEM_IBGDA_SUPPORT=1 \
|
||||
NVSHMEM_PMIX_SUPPORT=0 \
|
||||
NVSHMEM_TIMEOUT_DEVICE_POLLING=0 \
|
||||
NVSHMEM_USE_GDRCOPY=1 \
|
||||
cmake -S . -B build/ -DCMAKE_INSTALL_PREFIX=${NVSHMEM_DIR} -DCMAKE_CUDA_ARCHITECTURES="90" && \
|
||||
cmake --build build --target install -j${CMAKE_BUILD_PARALLEL_LEVEL}
|
||||
|
||||
# Install DeepEP
|
||||
RUN cd /sgl-workspace/DeepEP && \
|
||||
case "$CUDA_VERSION" in \
|
||||
12.6.1) \
|
||||
CHOSEN_TORCH_CUDA_ARCH_LIST='9.0' \
|
||||
;; \
|
||||
12.8.1|12.9.1) \
|
||||
CHOSEN_TORCH_CUDA_ARCH_LIST='9.0;10.0' \
|
||||
;; \
|
||||
*) \
|
||||
echo "Unsupported CUDA version: $CUDA_VERSION" && exit 1 \
|
||||
;; \
|
||||
esac && \
|
||||
NVSHMEM_DIR=${NVSHMEM_DIR} TORCH_CUDA_ARCH_LIST="${CHOSEN_TORCH_CUDA_ARCH_LIST}" pip install .
|
||||
|
||||
# Python tools
|
||||
RUN python3 -m pip install --no-cache-dir \
|
||||
datamodel_code_generator \
|
||||
mooncake-transfer-engine==0.3.5 \
|
||||
pre-commit \
|
||||
pytest \
|
||||
black \
|
||||
isort \
|
||||
icdiff \
|
||||
uv \
|
||||
wheel \
|
||||
scikit-build-core \
|
||||
nixl \
|
||||
py-spy
|
||||
|
||||
# Install development tools and utilities
|
||||
RUN apt-get update && apt-get install -y \
|
||||
gdb \
|
||||
ninja-build \
|
||||
vim \
|
||||
tmux \
|
||||
htop \
|
||||
wget \
|
||||
curl \
|
||||
locales \
|
||||
lsof \
|
||||
git \
|
||||
git-lfs \
|
||||
zsh \
|
||||
tree \
|
||||
silversearcher-ag \
|
||||
cloc \
|
||||
unzip \
|
||||
pkg-config \
|
||||
libssl-dev \
|
||||
bear \
|
||||
ccache \
|
||||
less \
|
||||
&& apt install -y rdma-core infiniband-diags openssh-server perftest ibverbs-providers libibumad3 libibverbs1 libnl-3-200 libnl-route-3-200 librdmacm1 \
|
||||
&& rm -rf /var/lib/apt/lists/* \
|
||||
&& apt-get clean
|
||||
|
||||
RUN apt update -y \
|
||||
&& apt install -y --no-install-recommends gnupg \
|
||||
&& echo "deb http://developer.download.nvidia.com/devtools/repos/ubuntu2004/amd64 /" | tee /etc/apt/sources.list.d/nvidia-devtools.list \
|
||||
&& apt-key adv --fetch-keys http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/7fa2af80.pub \
|
||||
&& apt update -y \
|
||||
&& apt install nsight-systems-cli -y
|
||||
|
||||
# Set up locale
|
||||
RUN locale-gen en_US.UTF-8
|
||||
ENV LANG en_US.UTF-8
|
||||
ENV LANGUAGE en_US:en
|
||||
ENV LC_ALL en_US.UTF-8
|
||||
|
||||
# Install minimal Python packages
|
||||
RUN python3 -m pip install --no-cache-dir --break-system-packages \
|
||||
pytest \
|
||||
black \
|
||||
isort \
|
||||
icdiff \
|
||||
scikit_build_core \
|
||||
uv \
|
||||
pre-commit \
|
||||
pandas \
|
||||
matplotlib \
|
||||
tabulate
|
||||
|
||||
# Install diff-so-fancy
|
||||
RUN curl -LSso /usr/local/bin/diff-so-fancy https://github.com/so-fancy/diff-so-fancy/releases/download/v1.4.4/diff-so-fancy \
|
||||
&& chmod +x /usr/local/bin/diff-so-fancy
|
||||
|
||||
# Install clang-format
|
||||
RUN curl -LSso /usr/local/bin/clang-format https://github.com/muttleyxd/clang-tools-static-binaries/releases/download/master-32d3ac78/clang-format-16_linux-amd64 \
|
||||
&& chmod +x /usr/local/bin/clang-format
|
||||
|
||||
# Install clangd
|
||||
RUN curl -L https://github.com/clangd/clangd/releases/download/18.1.3/clangd-linux-18.1.3.zip -o clangd.zip \
|
||||
&& unzip clangd.zip \
|
||||
&& cp -r clangd_18.1.3/bin/* /usr/local/bin/ \
|
||||
&& cp -r clangd_18.1.3/lib/* /usr/local/lib/ \
|
||||
&& rm -rf clangd_18.1.3 clangd.zip
|
||||
|
||||
# Install CMake
|
||||
RUN wget https://github.com/Kitware/CMake/releases/download/v3.31.1/cmake-3.31.1-linux-x86_64.tar.gz \
|
||||
&& tar -xzf cmake-3.31.1-linux-x86_64.tar.gz \
|
||||
&& cp -r cmake-3.31.1-linux-x86_64/bin/* /usr/local/bin/ \
|
||||
&& cp -r cmake-3.31.1-linux-x86_64/share/* /usr/local/share/ \
|
||||
&& rm -rf cmake-3.31.1-linux-x86_64 cmake-3.31.1-linux-x86_64.tar.gz
|
||||
|
||||
# Add yank script
|
||||
COPY --chown=root:root <<-"EOF" /usr/local/bin/yank
|
||||
#!/bin/bash
|
||||
put() {
|
||||
esc=$1
|
||||
test -n "$TMUX" -o -z "${TERM##screen*}" && esc="\033Ptmux;\033$esc\033\\"
|
||||
printf "$esc"
|
||||
}
|
||||
put "\033]52;c;!\a"
|
||||
buf=$( cat "$@" )
|
||||
len=$( printf %s "$buf" | wc -c ) max=74994
|
||||
test $len -gt $max && echo "$0: input is $(( len - max )) bytes too long" >&2
|
||||
put "\033]52;c;$( printf %s "$buf" | head -c $max | base64 | tr -d '\r\n' )\a"
|
||||
test -n "$TMUX" && tmux set-buffer "$buf" ||:
|
||||
EOF
|
||||
|
||||
RUN chmod +x /usr/local/bin/yank
|
||||
|
||||
# Install oh-my-zsh and plugins
|
||||
RUN sh -c "$(curl -fsSL https://raw.githubusercontent.com/ohmyzsh/ohmyzsh/master/tools/install.sh)" "" --unattended \
|
||||
&& git clone https://github.com/zsh-users/zsh-autosuggestions ${ZSH_CUSTOM:-~/.oh-my-zsh/custom}/plugins/zsh-autosuggestions \
|
||||
&& git clone https://github.com/zsh-users/zsh-syntax-highlighting.git ${ZSH_CUSTOM:-~/.oh-my-zsh/custom}/plugins/zsh-syntax-highlighting
|
||||
|
||||
# Configure Vim
|
||||
COPY --chown=root:root <<-"EOF" /root/.vimrc
|
||||
function! Yank(text) abort
|
||||
let escape = system('yank', a:text)
|
||||
if v:shell_error
|
||||
echoerr escape
|
||||
else
|
||||
call writefile([escape], '/dev/tty', 'b')
|
||||
endif
|
||||
endfunction
|
||||
|
||||
noremap <silent> <Leader>y y:<C-U>call Yank(@0)<CR>
|
||||
|
||||
" automatically run yank(1) whenever yanking in Vim
|
||||
function! CopyYank() abort
|
||||
call Yank(join(v:event.regcontents, "\n"))
|
||||
endfunction
|
||||
|
||||
autocmd TextYankPost * call CopyYank()
|
||||
|
||||
" Basic settings
|
||||
set number
|
||||
syntax on
|
||||
set mouse=a
|
||||
filetype indent on
|
||||
|
||||
" Indentation
|
||||
set autoindent nosmartindent
|
||||
set smarttab
|
||||
set expandtab
|
||||
set shiftwidth=4
|
||||
set softtabstop=4
|
||||
|
||||
" Visual guides
|
||||
set colorcolumn=120
|
||||
highlight ColorColumn ctermbg=5
|
||||
|
||||
" Status line
|
||||
set laststatus=2
|
||||
set statusline=%<%f\ %h%m%r%=%{\"[\".(&fenc==\"\"?&enc:&fenc).((exists(\"+bomb\")\ &&\ &bomb)?\",B\":\"\").\"]\ \"}%k\ %-14.(%l,%c%V%)\ %P
|
||||
|
||||
" Backspace behavior
|
||||
set backspace=2
|
||||
|
||||
" Encoding
|
||||
set encoding=utf-8
|
||||
set fileencoding=utf-8
|
||||
EOF
|
||||
|
||||
# Configure tmux
|
||||
COPY --chown=root:root <<-"EOF" /root/.tmux.conf
|
||||
# Pane border styling
|
||||
set -g pane-border-style fg='#742727',bg=black
|
||||
set -g pane-active-border-style fg=red,bg=black
|
||||
|
||||
# Status bar styling
|
||||
set -g status-style bg='#0C8A92',fg=black
|
||||
|
||||
# Change prefix key to backtick
|
||||
set-option -g prefix `
|
||||
unbind C-b
|
||||
bind-key ` send-prefix
|
||||
|
||||
# Split panes using - and = with current path
|
||||
unbind '"'
|
||||
bind - splitw -v -c '#{pane_current_path}'
|
||||
unbind '%'
|
||||
bind = splitw -h -c '#{pane_current_path}'
|
||||
|
||||
# Vi mode settings
|
||||
bind-key -T copy-mode-vi Y send-keys -X copy-pipe 'yank > #{pane_tty}'
|
||||
set-window-option -g mode-keys vi
|
||||
|
||||
# Other settings
|
||||
set-option -g escape-time 0
|
||||
set-option -g base-index 1
|
||||
set-window-option -g mouse on
|
||||
set -g history-limit 100000
|
||||
EOF
|
||||
|
||||
# Configure Git
|
||||
RUN git config --global core.editor "vim" \
|
||||
&& git config --global core.whitespace "fix,-indent-with-non-tab,trailing-space,cr-at-eol" \
|
||||
&& git config --global core.pager "diff-so-fancy | less --tabs=4 -RFX" \
|
||||
&& git config --global color.ui true \
|
||||
&& git config --global color."diff-highlight".oldNormal "red bold" \
|
||||
&& git config --global color."diff-highlight".oldHighlight "red bold 52" \
|
||||
&& git config --global color."diff-highlight".newNormal "green bold" \
|
||||
&& git config --global color."diff-highlight".newHighlight "green bold 22" \
|
||||
&& git config --global color.diff.meta "11" \
|
||||
&& git config --global color.diff.frag "magenta bold" \
|
||||
&& git config --global color.diff.commit "yellow bold" \
|
||||
&& git config --global color.diff.old "red bold" \
|
||||
&& git config --global color.diff.new "green bold" \
|
||||
&& git config --global color.diff.whitespace "red reverse" \
|
||||
&& git config --global alias.lg "log --color --graph --pretty=format:'%Cred%h%Creset - %s %Cgreen(%cr) %C(bold blue)<%an>%Creset%C(auto)%d%Creset' --abbrev-commit --" \
|
||||
&& git config --global http.sslVerify false \
|
||||
&& git config --global pull.rebase true
|
||||
|
||||
# Configure zsh
|
||||
COPY --chown=root:root <<-"EOF" /root/.zshrc
|
||||
export ZSH="/root/.oh-my-zsh"
|
||||
|
||||
# Theme
|
||||
ZSH_THEME="robbyrussell"
|
||||
|
||||
# Plugins
|
||||
plugins=(
|
||||
git
|
||||
z
|
||||
zsh-autosuggestions
|
||||
zsh-syntax-highlighting
|
||||
)
|
||||
|
||||
source $ZSH/oh-my-zsh.sh
|
||||
|
||||
# Aliases
|
||||
alias ll='ls -alF'
|
||||
alias la='ls -A'
|
||||
alias l='ls -CF'
|
||||
alias vi='vim'
|
||||
|
||||
# Enhanced history
|
||||
HISTSIZE=10000
|
||||
SAVEHIST=10000
|
||||
setopt HIST_IGNORE_ALL_DUPS
|
||||
setopt HIST_FIND_NO_DUPS
|
||||
setopt INC_APPEND_HISTORY
|
||||
EOF
|
||||
|
||||
RUN set -euxo ; \
|
||||
curl --proto '=https' --tlsv1.2 -sSf https://just.systems/install.sh | bash -s -- --to /usr/local/bin
|
||||
|
||||
# Set workspace directory
|
||||
WORKDIR /sgl-workspace/sglang
|
||||
351
docker/Dockerfile.gb200
Normal file
351
docker/Dockerfile.gb200
Normal file
@@ -0,0 +1,351 @@
|
||||
ARG CUDA_VERSION=12.9.1
|
||||
FROM nvidia/cuda:${CUDA_VERSION}-cudnn-devel-ubuntu22.04
|
||||
|
||||
ARG BUILD_TYPE=blackwell
|
||||
ARG DEEPEP_COMMIT=1b14ad661c7640137fcfe93cccb2694ede1220b0
|
||||
ARG CMAKE_BUILD_PARALLEL_LEVEL=2
|
||||
ENV DEBIAN_FRONTEND=noninteractive \
|
||||
CUDA_HOME=/usr/local/cuda \
|
||||
GDRCOPY_HOME=/usr/src/gdrdrv-2.4.4/ \
|
||||
NVSHMEM_DIR=/sgl-workspace/nvshmem/install \
|
||||
BUILD_TYPE=${BUILD_TYPE} \
|
||||
TORCH_CUDA_ARCH_LIST="10.0 12.0"
|
||||
|
||||
# Set timezone and install all packages
|
||||
RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \
|
||||
&& echo 'tzdata tzdata/Zones/America select Los_Angeles' | debconf-set-selections \
|
||||
&& apt-get update && apt-get install -y --no-install-recommends \
|
||||
tzdata \
|
||||
software-properties-common netcat-openbsd kmod unzip openssh-server \
|
||||
curl wget lsof zsh ccache tmux htop git-lfs tree \
|
||||
python3 python3-pip python3-dev libpython3-dev python3-venv \
|
||||
build-essential cmake \
|
||||
libopenmpi-dev libnuma1 libnuma-dev \
|
||||
libibverbs-dev libibverbs1 libibumad3 \
|
||||
librdmacm1 libnl-3-200 libnl-route-3-200 libnl-route-3-dev libnl-3-dev \
|
||||
ibverbs-providers infiniband-diags perftest \
|
||||
libgoogle-glog-dev libgtest-dev libjsoncpp-dev libunwind-dev \
|
||||
libboost-all-dev libssl-dev \
|
||||
libgrpc-dev libgrpc++-dev libprotobuf-dev protobuf-compiler-grpc \
|
||||
pybind11-dev \
|
||||
libhiredis-dev libcurl4-openssl-dev \
|
||||
libczmq4 libczmq-dev \
|
||||
libfabric-dev \
|
||||
patchelf \
|
||||
nvidia-dkms-550 \
|
||||
devscripts debhelper fakeroot dkms check libsubunit0 libsubunit-dev \
|
||||
&& ln -sf /usr/bin/python3 /usr/bin/python \
|
||||
&& rm -rf /var/lib/apt/lists/* \
|
||||
&& apt-get clean
|
||||
|
||||
# Install SGLang missing package for blackwell build type
|
||||
RUN python3 -m pip install openai httpx
|
||||
|
||||
# GDRCopy installation
|
||||
RUN mkdir -p /tmp/gdrcopy && cd /tmp \
|
||||
&& git clone https://github.com/NVIDIA/gdrcopy.git -b v2.4.4 \
|
||||
&& cd gdrcopy/packages \
|
||||
&& CUDA=/usr/local/cuda ./build-deb-packages.sh \
|
||||
&& dpkg -i gdrdrv-dkms_*.deb libgdrapi_*.deb gdrcopy-tests_*.deb gdrcopy_*.deb \
|
||||
&& cd / && rm -rf /tmp/gdrcopy
|
||||
|
||||
# Fix DeepEP IBGDA symlink
|
||||
RUN ln -sf /usr/lib/$(uname -m)-linux-gnu/libmlx5.so.1 /usr/lib/$(uname -m)-linux-gnu/libmlx5.so
|
||||
|
||||
# Clone and install SGLang
|
||||
WORKDIR /sgl-workspace
|
||||
RUN python3 -m pip install --no-cache-dir --upgrade pip setuptools wheel html5lib six \
|
||||
&& git clone --depth 1 https://github.com/sgl-project/sglang.git \
|
||||
&& cd sglang \
|
||||
&& case "$CUDA_VERSION" in \
|
||||
12.9.1) CUINDEX=129 ;; \
|
||||
*) echo "Unsupported CUDA version: $CUDA_VERSION" && exit 1 ;; \
|
||||
esac \
|
||||
&& python3 -m pip install --no-cache-dir -e "python[${BUILD_TYPE}]" --extra-index-url https://download.pytorch.org/whl/cu${CUINDEX} \
|
||||
&& if [ "$CUDA_VERSION" = "12.9.1" ]; then \
|
||||
python3 -m pip install --no-cache-dir nvidia-nccl-cu12==2.27.6 --force-reinstall --no-deps ; \
|
||||
python3 -m pip install --no-cache-dir https://github.com/sgl-project/whl/releases/download/v0.3.4/sgl_kernel-0.3.4+cu129-cp310-abi3-manylinux2014_$(uname -m).whl --force-reinstall --no-deps ; \
|
||||
fi
|
||||
|
||||
# Download source files
|
||||
RUN wget https://developer.download.nvidia.com/compute/redist/nvshmem/3.3.9/source/nvshmem_src_cuda12-all-all-3.3.9.tar.gz && \
|
||||
git clone https://github.com/fzyzcjy/DeepEP.git && \
|
||||
cd DeepEP && git checkout ${DEEPEP_COMMIT} && cd .. && \
|
||||
tar -xf nvshmem_src_cuda12-all-all-3.3.9.tar.gz && \
|
||||
mv nvshmem_src nvshmem && \
|
||||
rm -f /sgl-workspace/nvshmem_src_cuda12-all-all-3.3.9.tar.gz
|
||||
|
||||
# Build and install NVSHMEM
|
||||
RUN cd /sgl-workspace/nvshmem && \
|
||||
NVSHMEM_SHMEM_SUPPORT=0 \
|
||||
NVSHMEM_UCX_SUPPORT=0 \
|
||||
NVSHMEM_USE_NCCL=0 \
|
||||
NVSHMEM_MPI_SUPPORT=0 \
|
||||
NVSHMEM_IBGDA_SUPPORT=1 \
|
||||
NVSHMEM_PMIX_SUPPORT=0 \
|
||||
NVSHMEM_TIMEOUT_DEVICE_POLLING=0 \
|
||||
NVSHMEM_USE_GDRCOPY=1 \
|
||||
cmake -S . -B build/ -DCMAKE_INSTALL_PREFIX=${NVSHMEM_DIR} -DCMAKE_CUDA_ARCHITECTURES="100;120" && \
|
||||
cmake --build build --target install -j${CMAKE_BUILD_PARALLEL_LEVEL}
|
||||
|
||||
# Install DeepEP
|
||||
RUN cd /sgl-workspace/DeepEP && \
|
||||
NVSHMEM_DIR=${NVSHMEM_DIR} pip install .
|
||||
|
||||
# Python tools
|
||||
RUN python3 -m pip install --no-cache-dir \
|
||||
datamodel_code_generator \
|
||||
mooncake-transfer-engine==0.3.5 \
|
||||
pre-commit \
|
||||
pytest \
|
||||
black \
|
||||
isort \
|
||||
icdiff \
|
||||
uv \
|
||||
wheel \
|
||||
scikit-build-core
|
||||
|
||||
# These will be automatically installed by future versions of flashinfer after 0.2.9rc2
|
||||
RUN python3 -m pip install --no-cache-dir \
|
||||
nvidia-cudnn-cu12 \
|
||||
nvidia-cudnn-frontend
|
||||
|
||||
# Install nixl kv transfer backend
|
||||
RUN python3 -m pip install --no-cache-dir \
|
||||
nixl
|
||||
|
||||
# Install development tools and utilities
|
||||
RUN apt-get update && apt-get install -y \
|
||||
gdb \
|
||||
ninja-build \
|
||||
vim \
|
||||
tmux \
|
||||
htop \
|
||||
wget \
|
||||
curl \
|
||||
locales \
|
||||
lsof \
|
||||
git \
|
||||
git-lfs \
|
||||
zsh \
|
||||
tree \
|
||||
silversearcher-ag \
|
||||
cloc \
|
||||
unzip \
|
||||
pkg-config \
|
||||
libssl-dev \
|
||||
bear \
|
||||
ccache \
|
||||
less \
|
||||
&& apt install -y rdma-core infiniband-diags openssh-server perftest ibverbs-providers libibumad3 libibverbs1 libnl-3-200 libnl-route-3-200 librdmacm1 \
|
||||
&& rm -rf /var/lib/apt/lists/* \
|
||||
&& apt-get clean
|
||||
|
||||
RUN apt update -y \
|
||||
&& apt install -y --no-install-recommends gnupg \
|
||||
&& echo "deb http://developer.download.nvidia.com/devtools/repos/ubuntu2004/$(if [ "$(uname -m)" = "aarch64" ]; then echo "arm64"; else echo "amd64"; fi) /" | tee /etc/apt/sources.list.d/nvidia-devtools.list \
|
||||
&& apt-key adv --fetch-keys http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/$(if [ "$(uname -m)" = "aarch64" ]; then echo "arm64"; else echo "x86_64"; fi)/7fa2af80.pub \
|
||||
&& apt update -y \
|
||||
&& apt install nsight-systems-cli -y
|
||||
|
||||
# Set up locale
|
||||
RUN locale-gen en_US.UTF-8
|
||||
ENV LANG=en_US.UTF-8
|
||||
ENV LANGUAGE=en_US:en
|
||||
ENV LC_ALL=en_US.UTF-8
|
||||
|
||||
# Install minimal Python packages
|
||||
RUN python3 -m pip install --no-cache-dir --break-system-packages \
|
||||
pytest \
|
||||
black \
|
||||
isort \
|
||||
icdiff \
|
||||
scikit_build_core \
|
||||
uv \
|
||||
pre-commit \
|
||||
pandas \
|
||||
matplotlib \
|
||||
tabulate
|
||||
|
||||
# Install diff-so-fancy
|
||||
RUN curl -LSso /usr/local/bin/diff-so-fancy https://github.com/so-fancy/diff-so-fancy/releases/download/v1.4.4/diff-so-fancy \
|
||||
&& chmod +x /usr/local/bin/diff-so-fancy
|
||||
|
||||
# Install clang-format
|
||||
RUN curl -LSso /usr/local/bin/clang-format https://github.com/muttleyxd/clang-tools-static-binaries/releases/download/master-32d3ac78/clang-format-16_linux-amd64 \
|
||||
&& chmod +x /usr/local/bin/clang-format
|
||||
|
||||
# Install clangd
|
||||
RUN curl -L https://github.com/clangd/clangd/releases/download/18.1.3/clangd-linux-18.1.3.zip -o clangd.zip \
|
||||
&& unzip clangd.zip \
|
||||
&& cp -r clangd_18.1.3/bin/* /usr/local/bin/ \
|
||||
&& cp -r clangd_18.1.3/lib/* /usr/local/lib/ \
|
||||
&& rm -rf clangd_18.1.3 clangd.zip
|
||||
|
||||
# Install CMake
|
||||
RUN CMAKE_VERSION=3.31.1 \
|
||||
&& ARCH=$(uname -m) \
|
||||
&& CMAKE_INSTALLER="cmake-${CMAKE_VERSION}-linux-${ARCH}" \
|
||||
&& wget "https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/${CMAKE_INSTALLER}.tar.gz" \
|
||||
&& tar -xzf "${CMAKE_INSTALLER}.tar.gz" \
|
||||
&& cp -r "${CMAKE_INSTALLER}/bin/"* /usr/local/bin/ \
|
||||
&& cp -r "${CMAKE_INSTALLER}/share/"* /usr/local/share/ \
|
||||
&& rm -rf "${CMAKE_INSTALLER}" "${CMAKE_INSTALLER}.tar.gz"
|
||||
|
||||
# Add yank script
|
||||
COPY --chown=root:root <<-"EOF" /usr/local/bin/yank
|
||||
#!/bin/bash
|
||||
put() {
|
||||
esc=$1
|
||||
test -n "$TMUX" -o -z "${TERM##screen*}" && esc="\033Ptmux;\033$esc\033\\"
|
||||
printf "$esc"
|
||||
}
|
||||
put "\033]52;c;!\a"
|
||||
buf=$( cat "$@" )
|
||||
len=$( printf %s "$buf" | wc -c ) max=74994
|
||||
test $len -gt $max && echo "$0: input is $(( len - max )) bytes too long" >&2
|
||||
put "\033]52;c;$( printf %s "$buf" | head -c $max | base64 | tr -d '\r\n' )\a"
|
||||
test -n "$TMUX" && tmux set-buffer "$buf" ||:
|
||||
EOF
|
||||
|
||||
RUN chmod +x /usr/local/bin/yank
|
||||
|
||||
# Install oh-my-zsh and plugins
|
||||
RUN sh -c "$(curl -fsSL https://raw.githubusercontent.com/ohmyzsh/ohmyzsh/master/tools/install.sh)" "" --unattended \
|
||||
&& git clone https://github.com/zsh-users/zsh-autosuggestions ${ZSH_CUSTOM:-~/.oh-my-zsh/custom}/plugins/zsh-autosuggestions \
|
||||
&& git clone https://github.com/zsh-users/zsh-syntax-highlighting.git ${ZSH_CUSTOM:-~/.oh-my-zsh/custom}/plugins/zsh-syntax-highlighting
|
||||
|
||||
# Configure Vim
|
||||
COPY --chown=root:root <<-"EOF" /root/.vimrc
|
||||
function! Yank(text) abort
|
||||
let escape = system('yank', a:text)
|
||||
if v:shell_error
|
||||
echoerr escape
|
||||
else
|
||||
call writefile([escape], '/dev/tty', 'b')
|
||||
endif
|
||||
endfunction
|
||||
|
||||
noremap <silent> <Leader>y y:<C-U>call Yank(@0)<CR>
|
||||
|
||||
" automatically run yank(1) whenever yanking in Vim
|
||||
function! CopyYank() abort
|
||||
call Yank(join(v:event.regcontents, "\n"))
|
||||
endfunction
|
||||
|
||||
autocmd TextYankPost * call CopyYank()
|
||||
|
||||
" Basic settings
|
||||
set number
|
||||
syntax on
|
||||
set mouse=a
|
||||
filetype indent on
|
||||
|
||||
" Indentation
|
||||
set autoindent nosmartindent
|
||||
set smarttab
|
||||
set expandtab
|
||||
set shiftwidth=4
|
||||
set softtabstop=4
|
||||
|
||||
" Visual guides
|
||||
set colorcolumn=120
|
||||
highlight ColorColumn ctermbg=5
|
||||
|
||||
" Status line
|
||||
set laststatus=2
|
||||
set statusline=%<%f\ %h%m%r%=%{\"[\".(&fenc==\"\"?&enc:&fenc).((exists(\"+bomb\")\ &&\ &bomb)?\",B\":\"\").\"]\ \"}%k\ %-14.(%l,%c%V%)\ %P
|
||||
|
||||
" Backspace behavior
|
||||
set backspace=2
|
||||
|
||||
" Encoding
|
||||
set encoding=utf-8
|
||||
set fileencoding=utf-8
|
||||
EOF
|
||||
|
||||
# Configure tmux
|
||||
COPY --chown=root:root <<-"EOF" /root/.tmux.conf
|
||||
# Pane border styling
|
||||
set -g pane-border-style fg='#742727',bg=black
|
||||
set -g pane-active-border-style fg=red,bg=black
|
||||
|
||||
# Status bar styling
|
||||
set -g status-style bg='#0C8A92',fg=black
|
||||
|
||||
# Change prefix key to backtick
|
||||
set-option -g prefix `
|
||||
unbind C-b
|
||||
bind-key ` send-prefix
|
||||
|
||||
# Split panes using - and = with current path
|
||||
unbind '"'
|
||||
bind - splitw -v -c '#{pane_current_path}'
|
||||
unbind '%'
|
||||
bind = splitw -h -c '#{pane_current_path}'
|
||||
|
||||
# Vi mode settings
|
||||
bind-key -T copy-mode-vi Y send-keys -X copy-pipe 'yank > #{pane_tty}'
|
||||
set-window-option -g mode-keys vi
|
||||
|
||||
# Other settings
|
||||
set-option -g escape-time 0
|
||||
set-option -g base-index 1
|
||||
set-window-option -g mouse on
|
||||
EOF
|
||||
|
||||
# Configure Git
|
||||
RUN git config --global core.editor "vim" \
|
||||
&& git config --global core.whitespace "fix,-indent-with-non-tab,trailing-space,cr-at-eol" \
|
||||
&& git config --global core.pager "diff-so-fancy | less --tabs=4 -RFX" \
|
||||
&& git config --global color.ui true \
|
||||
&& git config --global color."diff-highlight".oldNormal "red bold" \
|
||||
&& git config --global color."diff-highlight".oldHighlight "red bold 52" \
|
||||
&& git config --global color."diff-highlight".newNormal "green bold" \
|
||||
&& git config --global color."diff-highlight".newHighlight "green bold 22" \
|
||||
&& git config --global color.diff.meta "11" \
|
||||
&& git config --global color.diff.frag "magenta bold" \
|
||||
&& git config --global color.diff.commit "yellow bold" \
|
||||
&& git config --global color.diff.old "red bold" \
|
||||
&& git config --global color.diff.new "green bold" \
|
||||
&& git config --global color.diff.whitespace "red reverse" \
|
||||
&& git config --global alias.lg "log --color --graph --pretty=format:'%Cred%h%Creset - %s %Cgreen(%cr) %C(bold blue)<%an>%Creset%C(auto)%d%Creset' --abbrev-commit --" \
|
||||
&& git config --global http.sslVerify false \
|
||||
&& git config --global pull.rebase true
|
||||
|
||||
# Configure zsh
|
||||
COPY --chown=root:root <<-"EOF" /root/.zshrc
|
||||
export ZSH="/root/.oh-my-zsh"
|
||||
|
||||
# Theme
|
||||
ZSH_THEME="robbyrussell"
|
||||
|
||||
# Plugins
|
||||
plugins=(
|
||||
git
|
||||
z
|
||||
zsh-autosuggestions
|
||||
zsh-syntax-highlighting
|
||||
)
|
||||
|
||||
source $ZSH/oh-my-zsh.sh
|
||||
|
||||
# Aliases
|
||||
alias ll='ls -alF'
|
||||
alias la='ls -A'
|
||||
alias l='ls -CF'
|
||||
alias vi='vim'
|
||||
|
||||
# Enhanced history
|
||||
HISTSIZE=10000
|
||||
SAVEHIST=10000
|
||||
setopt HIST_IGNORE_ALL_DUPS
|
||||
setopt HIST_FIND_NO_DUPS
|
||||
setopt INC_APPEND_HISTORY
|
||||
EOF
|
||||
|
||||
RUN set -euxo ; \
|
||||
curl --proto '=https' --tlsv1.2 -sSf https://just.systems/install.sh | bash -s -- --to /usr/local/bin
|
||||
|
||||
# Set workspace directory
|
||||
WORKDIR /sgl-workspace/sglang
|
||||
80
docker/Dockerfile.npu
Normal file
80
docker/Dockerfile.npu
Normal file
@@ -0,0 +1,80 @@
|
||||
ARG CANN_VERSION=8.2.rc1
|
||||
ARG DEVICE_TYPE=a3
|
||||
ARG OS=ubuntu22.04
|
||||
ARG PYTHON_VERSION=py3.11
|
||||
|
||||
FROM quay.io/ascend/cann:$CANN_VERSION-$DEVICE_TYPE-$OS-$PYTHON_VERSION
|
||||
|
||||
# Update pip & apt sources
|
||||
ARG PIP_INDEX_URL="https://pypi.org/simple/"
|
||||
ARG APTMIRROR=""
|
||||
ARG MEMFABRIC_URL=https://sglang-ascend.obs.cn-east-3.myhuaweicloud.com/sglang/mf_adapter-1.0.0-cp311-cp311-linux_aarch64.whl
|
||||
ARG PYTORCH_VERSION=2.6.0
|
||||
ARG TORCHVISION_VERSION=0.21.0
|
||||
ARG PTA_URL="https://gitee.com/ascend/pytorch/releases/download/v7.1.0.1-pytorch2.6.0/torch_npu-2.6.0.post1-cp311-cp311-manylinux_2_28_aarch64.whl"
|
||||
ARG VLLM_TAG=v0.8.5
|
||||
ARG TRITON_ASCEND_URL=https://sglang-ascend.obs.cn-east-3.myhuaweicloud.com/sglang/triton_ascend-3.2.0.dev20250729-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl
|
||||
ARG SGLANG_TAG=main
|
||||
ARG ASCEND_CANN_PATH=/usr/local/Ascend/ascend-toolkit
|
||||
ARG SGLANG_KERNEL_NPU_TAG=main
|
||||
|
||||
WORKDIR /workspace
|
||||
|
||||
# Define environments
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
RUN pip config set global.index-url $PIP_INDEX_URL
|
||||
RUN if [ -n "$APTMIRROR" ];then sed -i "s|.*.ubuntu.com|$APTMIRROR|g" /etc/apt/sources.list ;fi
|
||||
|
||||
# Install development tools and utilities
|
||||
RUN apt-get update -y && apt upgrade -y && apt-get install -y \
|
||||
build-essential \
|
||||
cmake \
|
||||
vim \
|
||||
wget \
|
||||
curl \
|
||||
net-tools \
|
||||
zlib1g-dev \
|
||||
lld \
|
||||
clang \
|
||||
locales \
|
||||
ccache \
|
||||
ca-certificates \
|
||||
&& rm -rf /var/cache/apt/* \
|
||||
&& rm -rf /var/lib/apt/lists/* \
|
||||
&& update-ca-certificates \
|
||||
&& locale-gen en_US.UTF-8
|
||||
|
||||
ENV LANG=en_US.UTF-8
|
||||
ENV LANGUAGE=en_US:en
|
||||
ENV LC_ALL=en_US.UTF-8
|
||||
|
||||
# Install dependencies
|
||||
# TODO: install from pypi released memfabric
|
||||
RUN pip install $MEMFABRIC_URL --no-cache-dir
|
||||
|
||||
# Install vLLM
|
||||
RUN git clone --depth 1 https://github.com/vllm-project/vllm.git --branch $VLLM_TAG && \
|
||||
(cd vllm && VLLM_TARGET_DEVICE="empty" pip install -v . --no-cache-dir) && rm -rf vllm
|
||||
|
||||
# TODO: install from pypi released triton-ascend
|
||||
RUN pip install torch==$PYTORCH_VERSION torchvision==$TORCHVISION_VERSION --index-url https://download.pytorch.org/whl/cpu --no-cache-dir \
|
||||
&& wget ${PTA_URL} && pip install "./torch_npu-2.6.0.post1-cp311-cp311-manylinux_2_28_aarch64.whl" --no-cache-dir \
|
||||
&& python3 -m pip install --no-cache-dir attrs==24.2.0 numpy==1.26.4 scipy==1.13.1 decorator==5.1.1 psutil==6.0.0 pytest==8.3.2 pytest-xdist==3.6.1 pyyaml pybind11 \
|
||||
&& pip install ${TRITON_ASCEND_URL} --no-cache-dir
|
||||
|
||||
# Install SGLang
|
||||
RUN git clone https://github.com/sgl-project/sglang --branch $SGLANG_TAG && \
|
||||
(cd sglang/python && pip install -v .[srt_npu] --no-cache-dir) && rm -rf sglang
|
||||
|
||||
# Install Deep-ep
|
||||
RUN git clone --branch $SGLANG_KERNEL_NPU_TAG https://github.com/sgl-project/sgl-kernel-npu.git \
|
||||
&& export LD_LIBRARY_PATH=${ASCEND_CANN_PATH}/latest/runtime/lib64/stub:$LD_LIBRARY_PATH && \
|
||||
source ${ASCEND_CANN_PATH}/set_env.sh && \
|
||||
cd sgl-kernel-npu && \
|
||||
bash build.sh \
|
||||
&& pip install output/deep_ep*.whl --no-cache-dir \
|
||||
&& cd .. && rm -rf sgl-kernel-npu \
|
||||
&& cd "$(pip show deep-ep | awk '/^Location:/ {print $2}')" && ln -s deep_ep/deep_ep_cpp*.so
|
||||
|
||||
CMD ["/bin/bash"]
|
||||
213
docker/Dockerfile.rocm
Normal file
213
docker/Dockerfile.rocm
Normal file
@@ -0,0 +1,213 @@
|
||||
# Usage (to build SGLang ROCm docker image):
|
||||
# docker build --build-arg SGL_BRANCH=v0.5.2rc1 --build-arg GPU_ARCH=gfx942 -t v0.5.2rc1-rocm630-mi30x -f Dockerfile.rocm .
|
||||
# docker build --build-arg SGL_BRANCH=v0.5.2rc1 --build-arg GPU_ARCH=gfx942-rocm700 -t v0.5.2rc1-rocm700-mi30x -f Dockerfile.rocm .
|
||||
# docker build --build-arg SGL_BRANCH=v0.5.2rc1 --build-arg GPU_ARCH=gfx950 -t v0.5.2rc1-rocm700-mi35x -f Dockerfile.rocm .
|
||||
|
||||
|
||||
# Default base images
|
||||
ARG BASE_IMAGE_942="rocm/sgl-dev:vllm20250114"
|
||||
ARG BASE_IMAGE_942_ROCM700="rocm/sgl-dev:rocm7-vllm-20250821"
|
||||
ARG BASE_IMAGE_950="rocm/sgl-dev:rocm7-vllm-20250821"
|
||||
|
||||
# This is necessary for scope purpose
|
||||
ARG GPU_ARCH=gfx950
|
||||
|
||||
# ===============================
|
||||
# Base image 942 with rocm630 and args
|
||||
FROM $BASE_IMAGE_942 AS gfx942
|
||||
ENV BUILD_VLLM="0"
|
||||
ENV BUILD_TRITON="1"
|
||||
ENV BUILD_LLVM="0"
|
||||
ENV BUILD_AITER_ALL="1"
|
||||
ENV BUILD_MOONCAKE="1"
|
||||
ENV AITER_COMMIT="v0.1.4"
|
||||
ENV NO_DEPS_FLAG=""
|
||||
|
||||
# ===============================
|
||||
# Base image 942 and args
|
||||
FROM $BASE_IMAGE_942_ROCM700 AS gfx942-rocm700
|
||||
ENV BUILD_VLLM="0"
|
||||
ENV BUILD_TRITON="0"
|
||||
ENV BUILD_LLVM="0"
|
||||
ENV BUILD_AITER_ALL="1"
|
||||
ENV BUILD_MOONCAKE="1"
|
||||
ENV AITER_COMMIT="v0.1.5"
|
||||
ENV NO_DEPS_FLAG=""
|
||||
|
||||
# ===============================
|
||||
# Base image 950 and args
|
||||
FROM $BASE_IMAGE_950 AS gfx950
|
||||
ENV BUILD_VLLM="0"
|
||||
ENV BUILD_TRITON="0"
|
||||
ENV BUILD_LLVM="0"
|
||||
ENV BUILD_AITER_ALL="1"
|
||||
ENV BUILD_MOONCAKE="1"
|
||||
ENV AITER_COMMIT="v0.1.5"
|
||||
ENV NO_DEPS_FLAG="--no-deps"
|
||||
|
||||
# ===============================
|
||||
# Chosen arch and args
|
||||
FROM ${GPU_ARCH}
|
||||
|
||||
# This is necessary for scope purpose, again
|
||||
ARG GPU_ARCH=gfx950
|
||||
ENV GPU_ARCH_LIST=${GPU_ARCH%-*}
|
||||
|
||||
ARG SGL_REPO="https://github.com/sgl-project/sglang.git"
|
||||
ARG SGL_DEFAULT="main"
|
||||
ARG SGL_BRANCH=${SGL_DEFAULT}
|
||||
|
||||
ARG TRITON_REPO="https://github.com/ROCm/triton.git"
|
||||
ARG TRITON_COMMIT="improve_fa_decode_3.0.0"
|
||||
|
||||
ARG AITER_REPO="https://github.com/ROCm/aiter.git"
|
||||
|
||||
ARG LLVM_REPO="https://github.com/jrbyrnes/llvm-project.git"
|
||||
ARG LLVM_BRANCH="MainOpSelV2"
|
||||
ARG LLVM_COMMIT="6520ace8227ffe2728148d5f3b9872a870b0a560"
|
||||
|
||||
ARG MOONCAKE_REPO="https://github.com/kvcache-ai/Mooncake.git"
|
||||
ARG MOONCAKE_COMMIT="dcdf1c784b40aa6975a8ed89fe26321b028e40e8"
|
||||
|
||||
USER root
|
||||
|
||||
# Install some basic utilities
|
||||
RUN python -m pip install --upgrade pip && pip install setuptools_scm
|
||||
RUN apt-get purge -y sccache; python -m pip uninstall -y sccache; rm -f "$(which sccache)"
|
||||
|
||||
WORKDIR /sgl-workspace
|
||||
|
||||
# -----------------------
|
||||
# llvm
|
||||
RUN if [ "$BUILD_LLVM" = "1" ]; then \
|
||||
ENV HIP_CLANG_PATH="/sgl-workspace/llvm-project/build/bin/" \
|
||||
git clone --single-branch ${LLVM_REPO} -b ${LLVM_BRANCH} \
|
||||
&& cd llvm-project \
|
||||
&& git checkout ${LLVM_COMMIT} \
|
||||
&& mkdir build \
|
||||
&& cd build \
|
||||
&& cmake -DCMAKE_BUILD_TYPE=Release -DLLVM_ENABLE_ASSERTIONS=1 -DLLVM_TARGETS_TO_BUILD="AMDGPU;X86" -DLLVM_ENABLE_PROJECTS="clang;lld;" -DLLVM_ENABLE_RUNTIMES="compiler-rt" ../llvm \
|
||||
&& make -j$(nproc); \
|
||||
fi
|
||||
|
||||
# -----------------------
|
||||
|
||||
# -----------------------
|
||||
# AITER
|
||||
RUN pip uninstall -y aiter
|
||||
RUN git clone ${AITER_REPO} \
|
||||
&& cd aiter \
|
||||
&& git checkout ${AITER_COMMIT} \
|
||||
&& git submodule update --init --recursive
|
||||
RUN cd aiter \
|
||||
&& if [ "$BUILD_AITER_ALL" = "1" ] && [ "$BUILD_LLVM" = "1" ]; then \
|
||||
HIP_CLANG_PATH=/sgl-workspace/llvm-project/build/bin/ PREBUILD_KERNELS=1 GPU_ARCHS=$GPU_ARCH_LIST python setup.py develop; \
|
||||
elif [ "$BUILD_AITER_ALL" = "1" ]; then \
|
||||
PREBUILD_KERNELS=1 GPU_ARCHS=$GPU_ARCH_LIST python setup.py develop; \
|
||||
else \
|
||||
GPU_ARCHS=$GPU_ARCH_LIST python setup.py develop; \
|
||||
fi
|
||||
|
||||
# -----------------------
|
||||
# Triton
|
||||
RUN if [ "$BUILD_TRITON" = "1" ]; then \
|
||||
pip uninstall -y triton \
|
||||
&& git clone ${TRITON_REPO} \
|
||||
&& cd triton \
|
||||
&& git checkout ${TRITON_COMMIT} \
|
||||
&& cd python \
|
||||
&& python setup.py install; \
|
||||
fi
|
||||
|
||||
# -----------------------
|
||||
# Build vLLM
|
||||
ARG VLLM_REPO="https://github.com/ROCm/vllm.git"
|
||||
ARG VLLM_BRANCH="9f6b92db47c3444b7a7d67451ba0c3a2d6af4c2c"
|
||||
RUN if [ "$BUILD_VLLM" = "1" ]; then \
|
||||
git clone ${VLLM_REPO} \
|
||||
&& cd vllm \
|
||||
&& git checkout ${VLLM_BRANCH} \
|
||||
&& python -m pip install -r requirements/rocm.txt \
|
||||
&& python setup.py clean --all \
|
||||
&& python setup.py develop; \
|
||||
fi
|
||||
|
||||
# -----------------------
|
||||
# Build Mooncake
|
||||
ENV PATH=$PATH:/usr/local/go/bin
|
||||
|
||||
RUN if [ "$BUILD_MOONCAKE" = "1" ]; then \
|
||||
apt update && apt install -y zip unzip wget && \
|
||||
apt install -y gcc make libtool autoconf librdmacm-dev rdmacm-utils infiniband-diags ibverbs-utils perftest ethtool libibverbs-dev rdma-core && \
|
||||
apt install -y openssh-server openmpi-bin openmpi-common libopenmpi-dev && \
|
||||
git clone ${MOONCAKE_REPO} && \
|
||||
cd Mooncake && \
|
||||
git checkout ${MOONCAKE_COMMIT} && \
|
||||
git submodule update --init --recursive && \
|
||||
bash dependencies.sh -y && \
|
||||
rm -rf /usr/local/go && \
|
||||
wget https://go.dev/dl/go1.22.2.linux-amd64.tar.gz && \
|
||||
tar -C /usr/local -xzf go1.22.2.linux-amd64.tar.gz && \
|
||||
rm go1.22.2.linux-amd64.tar.gz && \
|
||||
mkdir -p build && \
|
||||
cd build && \
|
||||
cmake .. -DUSE_ETCD=ON && \
|
||||
make -j "$(nproc)" && make install; \
|
||||
fi
|
||||
|
||||
|
||||
# -----------------------
|
||||
# Build SGLang
|
||||
ARG BUILD_TYPE=all
|
||||
|
||||
RUN pip install IPython \
|
||||
&& pip install orjson \
|
||||
&& pip install python-multipart \
|
||||
&& pip install torchao==0.9.0 \
|
||||
&& pip install pybind11
|
||||
|
||||
RUN pip uninstall -y sgl_kernel sglang
|
||||
RUN git clone ${SGL_REPO} \
|
||||
&& cd sglang \
|
||||
&& if [ "${SGL_BRANCH}" = ${SGL_DEFAULT} ]; then \
|
||||
echo "Using ${SGL_DEFAULT}, default branch."; \
|
||||
git checkout ${SGL_DEFAULT}; \
|
||||
else \
|
||||
echo "Using ${SGL_BRANCH} branch."; \
|
||||
git checkout ${SGL_BRANCH}; \
|
||||
fi \
|
||||
&& cd sgl-kernel \
|
||||
&& rm -f pyproject.toml \
|
||||
&& mv pyproject_rocm.toml pyproject.toml \
|
||||
&& AMDGPU_TARGET=$GPU_ARCH_LIST python setup_rocm.py install \
|
||||
&& cd .. \
|
||||
&& if [ "$BUILD_TYPE" = "srt" ]; then \
|
||||
python -m pip --no-cache-dir install -e "python[srt_hip]" ${NO_DEPS_FLAG}; \
|
||||
else \
|
||||
python -m pip --no-cache-dir install -e "python[all_hip]" ${NO_DEPS_FLAG}; \
|
||||
fi
|
||||
|
||||
RUN python -m pip cache purge
|
||||
|
||||
# Copy config files to support MI300X in virtualized environments (MI300X_VF). Symlinks will not be created in image build.
|
||||
RUN find /sgl-workspace/sglang/python/sglang/srt/layers/quantization/configs/ \
|
||||
/sgl-workspace/sglang/python/sglang/srt/layers/moe/fused_moe_triton/configs/ \
|
||||
-type f -name '*MI300X*' | xargs -I {} sh -c 'vf_config=$(echo "$1" | sed "s/MI300X/MI300X_VF/"); cp "$1" "$vf_config"' -- {}
|
||||
|
||||
# Performance environment variable.
|
||||
ENV HIP_FORCE_DEV_KERNARG=1
|
||||
ENV HSA_NO_SCRATCH_RECLAIM=1
|
||||
ENV SGLANG_SET_CPU_AFFINITY=1
|
||||
ENV SGLANG_ALLOW_OVERWRITE_LONGER_CONTEXT_LEN=1
|
||||
ENV NCCL_MIN_NCHANNELS=112
|
||||
|
||||
ENV SGLANG_USE_AITER=1
|
||||
ENV SGLANG_MOE_PADDING=1
|
||||
ENV VLLM_FP8_PADDING=1
|
||||
ENV VLLM_FP8_ACT_PADDING=1
|
||||
ENV VLLM_FP8_WEIGHT_PADDING=1
|
||||
ENV VLLM_FP8_REDUCE_CONV=1
|
||||
ENV TORCHINDUCTOR_MAX_AUTOTUNE=1
|
||||
ENV TORCHINDUCTOR_MAX_AUTOTUNE_POINTWISE=1
|
||||
|
||||
CMD ["/bin/bash"]
|
||||
78
docker/Dockerfile.router
Normal file
78
docker/Dockerfile.router
Normal file
@@ -0,0 +1,78 @@
|
||||
######################## BASE IMAGE ##########################
|
||||
FROM ubuntu:24.04 AS base
|
||||
|
||||
ARG PYTHON_VERSION=3.12
|
||||
|
||||
# set the environment variables
|
||||
ENV PATH="/root/.local/bin:${PATH}"
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
# uv environment variables
|
||||
ENV UV_HTTP_TIMEOUT=500
|
||||
ENV VIRTUAL_ENV="/opt/venv"
|
||||
ENV UV_PYTHON_INSTALL_DIR=/opt/uv/python
|
||||
ENV UV_LINK_MODE="copy"
|
||||
ENV PATH="$VIRTUAL_ENV/bin:$PATH"
|
||||
|
||||
|
||||
# install dependencies
|
||||
RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \
|
||||
&& echo 'tzdata tzdata/Zones/America select Los_Angeles' | debconf-set-selections \
|
||||
&& apt update -y \
|
||||
&& apt install -y curl \
|
||||
&& rm -rf /var/lib/apt/lists/* \
|
||||
&& apt clean
|
||||
|
||||
# install uv
|
||||
RUN curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
|
||||
# install python
|
||||
RUN uv venv --python ${PYTHON_VERSION} --seed ${VIRTUAL_ENV}
|
||||
|
||||
######################### BUILD IMAGE #########################
|
||||
FROM base AS build-image
|
||||
|
||||
ARG SGLANG_REPO_REF=main
|
||||
|
||||
# set the environment variables
|
||||
ENV PATH="/root/.cargo/bin:${PATH}"
|
||||
|
||||
# install dependencies
|
||||
RUN apt update -y \
|
||||
&& apt install -y git build-essential libssl-dev pkg-config protobuf-compiler \
|
||||
&& rm -rf /var/lib/apt/lists/* \
|
||||
&& apt clean
|
||||
|
||||
# install rustup from rustup.rs
|
||||
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y \
|
||||
&& rustc --version && cargo --version && protoc --version
|
||||
|
||||
# pull the github repository
|
||||
RUN cd /opt \
|
||||
&& git clone --depth=1 https://github.com/sgl-project/sglang.git \
|
||||
&& cd /opt/sglang \
|
||||
&& git checkout ${SGLANG_REPO_REF}
|
||||
|
||||
# working directory
|
||||
WORKDIR /opt/sglang/sgl-router
|
||||
|
||||
# build the rust dependencies
|
||||
RUN cargo build --release \
|
||||
&& uv build \
|
||||
&& rm -rf /root/.cache
|
||||
|
||||
######################### ROUTER IMAGE #########################
|
||||
FROM base AS router-image
|
||||
|
||||
# Copy the built package from the build image
|
||||
COPY --from=build-image /opt/sglang/sgl-router/dist/*.whl dist/
|
||||
|
||||
# Build the package and install
|
||||
RUN uv pip install --force-reinstall dist/*.whl
|
||||
|
||||
# Clean up unnecessary files to reduce the image size
|
||||
RUN rm -rf /root/.cache \
|
||||
&& apt purge -y --auto-remove curl
|
||||
|
||||
# Set the entrypoint to the main command
|
||||
ENTRYPOINT ["python3", "-m", "sglang_router.launch_router"]
|
||||
6
docker/Dockerfile.sagemaker
Normal file
6
docker/Dockerfile.sagemaker
Normal file
@@ -0,0 +1,6 @@
|
||||
FROM lmsysorg/sglang:latest
|
||||
|
||||
COPY serve /usr/bin/serve
|
||||
RUN chmod 777 /usr/bin/serve
|
||||
|
||||
ENTRYPOINT [ "/usr/bin/serve" ]
|
||||
49
docker/Dockerfile.xeon
Normal file
49
docker/Dockerfile.xeon
Normal file
@@ -0,0 +1,49 @@
|
||||
FROM ubuntu:24.04
|
||||
SHELL ["/bin/bash", "-c"]
|
||||
|
||||
ARG VER_SGLANG=main
|
||||
ARG VER_TORCH=2.7.1
|
||||
ARG VER_TORCHVISION=0.22.1
|
||||
ARG VER_TRITON=3.3.1
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get full-upgrade -y && \
|
||||
DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \
|
||||
ca-certificates \
|
||||
git \
|
||||
curl \
|
||||
wget \
|
||||
vim \
|
||||
gcc \
|
||||
g++ \
|
||||
make
|
||||
|
||||
WORKDIR /sgl-workspace
|
||||
|
||||
RUN curl -fsSL -v -o miniforge.sh -O https://github.com/conda-forge/miniforge/releases/download/24.11.3-2/Miniforge3-24.11.3-2-Linux-x86_64.sh && \
|
||||
bash miniforge.sh -b -p ./miniforge3 && \
|
||||
rm -f miniforge.sh && \
|
||||
. miniforge3/bin/activate && \
|
||||
conda install -y libsqlite==3.48.0 gperftools tbb libnuma numactl
|
||||
|
||||
ENV PATH=/sgl-workspace/miniforge3/bin:/sgl-workspace/miniforge3/condabin:${PATH}
|
||||
ENV PIP_ROOT_USER_ACTION=ignore
|
||||
ENV CONDA_PREFIX=/sgl-workspace/miniforge3
|
||||
|
||||
RUN pip config set global.index-url https://download.pytorch.org/whl/cpu && \
|
||||
pip config set global.extra-index-url https://pypi.org/simple && \
|
||||
pip install intel-openmp
|
||||
|
||||
RUN git clone https://github.com/sgl-project/sglang.git && \
|
||||
cd sglang && \
|
||||
git checkout ${VER_SGLANG} && \
|
||||
pip install -e "python[all_cpu]" && \
|
||||
pip install torch==${VER_TORCH} torchvision==${VER_TORCHVISION} triton==${VER_TRITON} --force-reinstall && \
|
||||
cd sgl-kernel && \
|
||||
cp pyproject_cpu.toml pyproject.toml && \
|
||||
pip install -v .
|
||||
|
||||
ENV SGLANG_USE_CPU_ENGINE=1
|
||||
ENV LD_PRELOAD=/sgl-workspace/miniforge3/lib/libiomp5.so:/sgl-workspace/miniforge3/lib/libtcmalloc.so:/sgl-workspace/miniforge3/lib/libtbbmalloc.so.2
|
||||
|
||||
WORKDIR /sgl-workspace/sglang
|
||||
35
docker/compose.yaml
Normal file
35
docker/compose.yaml
Normal file
@@ -0,0 +1,35 @@
|
||||
services:
|
||||
sglang:
|
||||
image: lmsysorg/sglang:latest
|
||||
container_name: sglang
|
||||
volumes:
|
||||
- ${HOME}/.cache/huggingface:/root/.cache/huggingface
|
||||
# If you use modelscope, you need mount this directory
|
||||
# - ${HOME}/.cache/modelscope:/root/.cache/modelscope
|
||||
restart: always
|
||||
network_mode: host # required by RDMA
|
||||
privileged: true # required by RDMA
|
||||
# Or you can only publish port 30000
|
||||
# ports:
|
||||
# - 30000:30000
|
||||
environment:
|
||||
HF_TOKEN: <secret>
|
||||
# if you use modelscope to download model, you need set this environment
|
||||
# - SGLANG_USE_MODELSCOPE: true
|
||||
entrypoint: python3 -m sglang.launch_server
|
||||
command: --model-path meta-llama/Llama-3.1-8B-Instruct
|
||||
--host 0.0.0.0
|
||||
--port 30000
|
||||
ulimits:
|
||||
memlock: -1
|
||||
stack: 67108864
|
||||
ipc: host
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "curl -f http://localhost:30000/health || exit 1"]
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices:
|
||||
- driver: nvidia
|
||||
device_ids: ["0"]
|
||||
capabilities: [gpu]
|
||||
103
docker/k8s-sglang-distributed-sts.yaml
Normal file
103
docker/k8s-sglang-distributed-sts.yaml
Normal file
@@ -0,0 +1,103 @@
|
||||
# Two Nodes Sglang example
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: StatefulSet
|
||||
metadata:
|
||||
name: distributed-sglang
|
||||
spec:
|
||||
replicas: 2 # number of nodes/pods to run distributed sglang
|
||||
selector:
|
||||
matchLabels:
|
||||
app: distributed-sglang
|
||||
serviceName: ""
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: distributed-sglang
|
||||
spec:
|
||||
containers:
|
||||
- name: sglang-container
|
||||
image: docker.io/lmsysorg/sglang:latest
|
||||
imagePullPolicy: Always # image may be replaced by official CI versioned image
|
||||
command:
|
||||
- /bin/bash
|
||||
- -c
|
||||
# please modify the sglang serving arguments below, as necessary.
|
||||
# NOTE: the --expert-parallel-size is for MoE model like DeepSeek-R1
|
||||
args:
|
||||
- |
|
||||
python3 -m sglang.launch_server \
|
||||
--model /llm-folder \
|
||||
--dist-init-addr sglang-master-pod:5000 \
|
||||
--tensor-parallel-size 16 \
|
||||
--nnodes 2 \
|
||||
--node-rank $POD_INDEX \
|
||||
--trust-remote-code \
|
||||
--host 0.0.0.0 \
|
||||
--port 8000 \
|
||||
--enable-metrics \
|
||||
--expert-parallel-size 16
|
||||
env:
|
||||
- name: POD_INDEX # reflects the node-rank
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
apiVersion: v1
|
||||
fieldPath: metadata.labels['apps.kubernetes.io/pod-index']
|
||||
- name: NCCL_DEBUG
|
||||
value: INFO
|
||||
resources:
|
||||
limits:
|
||||
nvidia.com/gpu: "8"
|
||||
requests:
|
||||
volumeMounts:
|
||||
- mountPath: /dev/shm
|
||||
name: dshm
|
||||
- mountPath: /llm-folder
|
||||
name: llm
|
||||
securityContext:
|
||||
privileged: true # to leverage RDMA/InfiniBand device, co-work with HostNetwork=true
|
||||
hostNetwork: true
|
||||
volumes:
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 10Gi
|
||||
name: dshm
|
||||
- hostPath:
|
||||
path: /llm-folder # replace with PVC or hostPath with your model weights
|
||||
type: DirectoryOrCreate
|
||||
name: llm
|
||||
#- persistentVolumeClaim:
|
||||
# claimName: llm-pvc
|
||||
# name: llm
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: sglang-master-pod
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: distributed-sglang
|
||||
apps.kubernetes.io/pod-index: "0"
|
||||
ports:
|
||||
- name: dist-port
|
||||
port: 5000
|
||||
targetPort: 5000
|
||||
---
|
||||
# the serving service
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: sglang-serving-on-master
|
||||
spec:
|
||||
type: NodePort
|
||||
selector:
|
||||
app: distributed-sglang
|
||||
apps.kubernetes.io/pod-index: "0"
|
||||
ports:
|
||||
- name: serving
|
||||
port: 8000
|
||||
targetPort: 8000
|
||||
- name: metrics
|
||||
port: 8080
|
||||
targetPort: 8080
|
||||
117
docker/k8s-sglang-service.yaml
Normal file
117
docker/k8s-sglang-service.yaml
Normal file
@@ -0,0 +1,117 @@
|
||||
apiVersion: v1
|
||||
kind: PersistentVolumeClaim
|
||||
metadata:
|
||||
name: llama-31-8b-sglang
|
||||
spec:
|
||||
accessModes:
|
||||
- ReadWriteMany
|
||||
resources:
|
||||
requests:
|
||||
storage: 30Gi
|
||||
storageClassName: default # change this to your preferred storage class
|
||||
volumeMode: Filesystem
|
||||
---
|
||||
apiVersion: node.k8s.io/v1
|
||||
kind: RuntimeClass
|
||||
metadata:
|
||||
name: nvidia
|
||||
handler: nvidia
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: meta-llama-31-8b-instruct-sglang
|
||||
spec:
|
||||
replicas: 1
|
||||
strategy:
|
||||
type: Recreate
|
||||
selector:
|
||||
matchLabels:
|
||||
app: meta-llama-31-8b-instruct-sglang
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: meta-llama-31-8b-instruct-sglang
|
||||
model: meta-llama-31-8b-instruct
|
||||
engine: sglang
|
||||
spec:
|
||||
restartPolicy: Always
|
||||
runtimeClassName: nvidia
|
||||
containers:
|
||||
- name: meta-llama-31-8b-instruct-sglang
|
||||
image: docker.io/lmsysorg/sglang:latest
|
||||
imagePullPolicy: Always # IfNotPresent or Never
|
||||
ports:
|
||||
- containerPort: 30000
|
||||
command: ["python3", "-m", "sglang.launch_server"]
|
||||
args:
|
||||
[
|
||||
"--model-path",
|
||||
"meta-llama/Llama-3.1-8B-Instruct",
|
||||
"--host",
|
||||
"0.0.0.0",
|
||||
"--port",
|
||||
"30000",
|
||||
]
|
||||
env:
|
||||
- name: HF_TOKEN
|
||||
value: <secret>
|
||||
resources:
|
||||
limits:
|
||||
nvidia.com/gpu: 1
|
||||
cpu: 8
|
||||
memory: 40Gi
|
||||
requests:
|
||||
cpu: 2
|
||||
memory: 16Gi
|
||||
nvidia.com/gpu: 1
|
||||
volumeMounts:
|
||||
- name: shm
|
||||
mountPath: /dev/shm
|
||||
- name: hf-cache
|
||||
mountPath: /root/.cache/huggingface
|
||||
- name: localtime
|
||||
mountPath: /etc/localtime
|
||||
readOnly: true
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /health
|
||||
port: 30000
|
||||
initialDelaySeconds: 120
|
||||
periodSeconds: 15
|
||||
timeoutSeconds: 10
|
||||
failureThreshold: 3
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /health_generate
|
||||
port: 30000
|
||||
initialDelaySeconds: 120
|
||||
periodSeconds: 15
|
||||
timeoutSeconds: 10
|
||||
failureThreshold: 3
|
||||
successThreshold: 1
|
||||
volumes:
|
||||
- name: shm
|
||||
emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 10Gi
|
||||
- name: hf-cache
|
||||
persistentVolumeClaim:
|
||||
claimName: llama-31-8b-sglang
|
||||
- name: localtime
|
||||
hostPath:
|
||||
path: /etc/localtime
|
||||
type: File
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: meta-llama-31-8b-instruct-sglang
|
||||
spec:
|
||||
selector:
|
||||
app: meta-llama-31-8b-instruct-sglang
|
||||
ports:
|
||||
- protocol: TCP
|
||||
port: 80 # port on host
|
||||
targetPort: 30000 # port in container
|
||||
type: LoadBalancer # change to ClusterIP if needed
|
||||
31
docker/serve
Executable file
31
docker/serve
Executable file
@@ -0,0 +1,31 @@
|
||||
#!/bin/bash
|
||||
|
||||
echo "Starting server"
|
||||
|
||||
SERVER_ARGS="--host 0.0.0.0 --port 8080"
|
||||
|
||||
if [ -n "$TENSOR_PARALLEL_DEGREE" ]; then
|
||||
SERVER_ARGS="${SERVER_ARGS} --tp-size ${TENSOR_PARALLEL_DEGREE}"
|
||||
fi
|
||||
|
||||
if [ -n "$DATA_PARALLEL_DEGREE" ]; then
|
||||
SERVER_ARGS="${SERVER_ARGS} --dp-size ${DATA_PARALLEL_DEGREE}"
|
||||
fi
|
||||
|
||||
if [ -n "$EXPERT_PARALLEL_DEGREE" ]; then
|
||||
SERVER_ARGS="${SERVER_ARGS} --ep-size ${EXPERT_PARALLEL_DEGREE}"
|
||||
fi
|
||||
|
||||
if [ -n "$MEM_FRACTION_STATIC" ]; then
|
||||
SERVER_ARGS="${SERVER_ARGS} --mem-fraction-static ${MEM_FRACTION_STATIC}"
|
||||
fi
|
||||
|
||||
if [ -n "$QUANTIZATION" ]; then
|
||||
SERVER_ARGS="${SERVER_ARGS} --quantization ${QUANTIZATION}"
|
||||
fi
|
||||
|
||||
if [ -n "$CHUNKED_PREFILL_SIZE" ]; then
|
||||
SERVER_ARGS="${SERVER_ARGS} --chunked-prefill-size ${CHUNKED_PREFILL_SIZE}"
|
||||
fi
|
||||
|
||||
python3 -m sglang.launch_server --model-path /opt/ml/model $SERVER_ARGS
|
||||
Reference in New Issue
Block a user