[docker] added rdma support (#3619)

This commit is contained in:
Shenggui Li
2025-02-17 15:36:16 +08:00
committed by GitHub
parent d03c4c25a7
commit c9565e49e7
7 changed files with 39 additions and 11 deletions

View File

@@ -14,6 +14,7 @@ RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \
&& update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.10 1 \
&& update-alternatives --set python3 /usr/bin/python3.10 && apt install python3.10-distutils -y \
&& apt install curl git sudo libibverbs-dev -y \
&& apt install -y rdma-core infiniband-diags openssh-server perftest ibverbs-providers libibumad3 libibverbs1 libnl-3-200 libnl-route-3-200 librdmacm1 \
&& curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py && python3 get-pip.py \
&& python3 --version \
&& python3 -m pip --version \

View File

@@ -21,6 +21,7 @@ RUN apt-get update && apt-get install -y \
pkg-config \
libssl-dev \
bear \
&& apt install -y rdma-core infiniband-diags openssh-server perftest ibverbs-providers libibumad3 libibverbs1 libnl-3-200 libnl-route-3-200 librdmacm1 \
&& rm -rf /var/lib/apt/lists/* \
&& apt-get clean

View File

@@ -20,6 +20,8 @@ ARG TRITON_COMMIT="improve_fa_decode_3.0.0"
ARG ATER_REPO="https://github.com/HaiShaw/ater"
ARG CK_COMMITS="fa05ae"
RUN apt install -y rdma-core infiniband-diags openssh-server perftest ibverbs-providers libibumad3 libibverbs1 libnl-3-200 libnl-route-3-200 librdmacm1
RUN git clone ${SGL_REPO} \
&& cd sglang \
&& if [ "${SGL_BRANCH}" = ${SGL_DEFAULT} ]; then \

View File

@@ -7,7 +7,8 @@ services:
# If you use modelscope, you need mount this directory
# - ${HOME}/.cache/modelscope:/root/.cache/modelscope
restart: always
network_mode: host
network_mode: host # required by RDMA
privileged: true # required by RDMA
# Or you can only publish port 30000
# ports:
# - 30000:30000
@@ -16,8 +17,7 @@ services:
# if you use modelscope to download model, you need set this environment
# - SGLANG_USE_MODELSCOPE: true
entrypoint: python3 -m sglang.launch_server
command:
--model-path meta-llama/Llama-3.1-8B-Instruct
command: --model-path meta-llama/Llama-3.1-8B-Instruct
--host 0.0.0.0
--port 30000
ulimits:
@@ -31,5 +31,5 @@ services:
reservations:
devices:
- driver: nvidia
device_ids: ['0']
device_ids: ["0"]
capabilities: [gpu]