[Dockerfile] Multi-arch support for ROCm (#7902)
Co-authored-by: Lin, Soga <soga.lin@amd.com> Co-authored-by: HaiShaw <hixiao@gmail.com>
This commit is contained in:
@@ -1,46 +1,96 @@
|
|||||||
# Usage (to build SGLang ROCm docker image):
|
# Usage (to build SGLang ROCm docker image):
|
||||||
# docker build --build-arg SGL_BRANCH=v0.4.9.post2 -t v0.4.9.post2-rocm630 -f Dockerfile.rocm .
|
# docker build --build-arg SGL_BRANCH=v0.4.9.post1 --build-arg GPU_ARCH=gfx942 -t v0.4.9.post1-rocm630-mi30x -f Dockerfile.rocm .
|
||||||
|
# docker build --build-arg SGL_BRANCH=v0.4.9.post1 --build-arg GPU_ARCH=gfx950 -t v0.4.9.post1-rocm700-mi35x -f Dockerfile.rocm .
|
||||||
|
|
||||||
# default base image
|
# Default base images
|
||||||
ARG BASE_IMAGE="rocm/sgl-dev:vllm20250114"
|
ARG BASE_IMAGE_950="rocm/7.0-preview:rocm7.0_preview_ubuntu_22.04_vllm_0.8.5_mi35X_prealpha"
|
||||||
|
ARG BASE_IMAGE_942="rocm/sgl-dev:vllm20250114"
|
||||||
|
|
||||||
FROM $BASE_IMAGE AS base
|
# This is necessary for scope purpose
|
||||||
USER root
|
ARG GPU_ARCH=gfx950
|
||||||
|
|
||||||
WORKDIR /sgl-workspace
|
# ===============================
|
||||||
ARG BUILD_TYPE=all
|
# Base image 942 and args
|
||||||
ARG SGL_REPO="https://github.com/sgl-project/sglang"
|
FROM $BASE_IMAGE_942 AS gfx942
|
||||||
ENV SGL_DEFAULT="main"
|
ENV BUILD_VLLM="0"
|
||||||
|
ENV BUILD_TRITON="1"
|
||||||
|
ENV BUILD_AITER_ALL="1"
|
||||||
|
ENV AITER_COMMIT="v0.1.4"
|
||||||
|
|
||||||
|
# ===============================
|
||||||
|
# Base image 950 and args
|
||||||
|
FROM $BASE_IMAGE_950 AS gfx950
|
||||||
|
ENV BUILD_VLLM="0"
|
||||||
|
ENV BUILD_TRITON="0"
|
||||||
|
ENV BUILD_AITER_ALL="1"
|
||||||
|
ENV AITER_COMMIT="v0.1.4"
|
||||||
|
|
||||||
|
# ===============================
|
||||||
|
# Chosen arch and args
|
||||||
|
FROM ${GPU_ARCH}
|
||||||
|
|
||||||
|
# This is necessary for scope purpose, again
|
||||||
|
ARG GPU_ARCH=gfx950
|
||||||
|
ENV GPU_ARCH_LIST=${GPU_ARCH:-${PYTORCH_ROCM_ARCH}}
|
||||||
|
|
||||||
|
ARG SGL_REPO="https://github.com/sgl-project/sglang.git"
|
||||||
|
ARG SGL_DEFAULT="main"
|
||||||
ARG SGL_BRANCH=${SGL_DEFAULT}
|
ARG SGL_BRANCH=${SGL_DEFAULT}
|
||||||
|
|
||||||
ARG TRITON_REPO="https://github.com/ROCm/triton.git"
|
ARG TRITON_REPO="https://github.com/ROCm/triton.git"
|
||||||
ARG TRITON_COMMIT="improve_fa_decode_3.0.0"
|
ARG TRITON_COMMIT="improve_fa_decode_3.0.0"
|
||||||
|
|
||||||
|
|
||||||
ARG AITER_REPO="https://github.com/ROCm/aiter.git"
|
ARG AITER_REPO="https://github.com/ROCm/aiter.git"
|
||||||
ARG AITER_COMMIT="v0.1.3"
|
|
||||||
|
|
||||||
RUN git clone ${SGL_REPO} \
|
USER root
|
||||||
&& cd sglang \
|
|
||||||
&& if [ "${SGL_BRANCH}" = ${SGL_DEFAULT} ]; then \
|
|
||||||
echo "Using ${SGL_DEFAULT}, default branch."; \
|
|
||||||
else \
|
|
||||||
echo "Using ${SGL_BRANCH} branch."; \
|
|
||||||
git checkout ${SGL_BRANCH}; \
|
|
||||||
fi \
|
|
||||||
&& cd sgl-kernel \
|
|
||||||
&& rm -f pyproject.toml \
|
|
||||||
&& mv pyproject_rocm.toml pyproject.toml \
|
|
||||||
&& python setup_rocm.py install \
|
|
||||||
&& cd .. \
|
|
||||||
&& if [ "$BUILD_TYPE" = "srt" ]; then \
|
|
||||||
python -m pip --no-cache-dir install -e "python[srt_hip]"; \
|
|
||||||
else \
|
|
||||||
python -m pip --no-cache-dir install -e "python[all_hip]"; \
|
|
||||||
fi
|
|
||||||
|
|
||||||
RUN cp -r /sgl-workspace/sglang /sglang
|
# Install some basic utilities
|
||||||
RUN python -m pip cache purge
|
RUN python -m pip install --upgrade pip && pip install setuptools_scm
|
||||||
|
RUN apt-get purge -y sccache; python -m pip uninstall -y sccache; rm -f "$(which sccache)"
|
||||||
|
|
||||||
|
WORKDIR /sgl-workspace
|
||||||
|
|
||||||
|
# -----------------------
|
||||||
|
# AITER
|
||||||
|
RUN pip uninstall -y aiter
|
||||||
|
RUN git clone ${AITER_REPO} \
|
||||||
|
&& cd aiter \
|
||||||
|
&& git checkout ${AITER_COMMIT} \
|
||||||
|
&& git submodule update --init --recursive
|
||||||
|
RUN cd aiter \
|
||||||
|
&& if [ "$BUILD_AITER_ALL" = "1" ]; then \
|
||||||
|
PREBUILD_KERNELS=1 GPU_ARCHS=$GPU_ARCH_LIST python setup.py develop; \
|
||||||
|
else \
|
||||||
|
GPU_ARCHS=$GPU_ARCH_LIST python setup.py develop; \
|
||||||
|
fi
|
||||||
|
|
||||||
|
# -----------------------
|
||||||
|
# Triton
|
||||||
|
RUN if [ "$BUILD_TRITON" = "1" ]; then \
|
||||||
|
pip uninstall -y triton \
|
||||||
|
&& git clone ${TRITON_REPO} \
|
||||||
|
&& cd triton \
|
||||||
|
&& git checkout ${TRITON_COMMIT} \
|
||||||
|
&& cd python \
|
||||||
|
&& python setup.py install; \
|
||||||
|
fi
|
||||||
|
|
||||||
|
# -----------------------
|
||||||
|
# Build vLLM
|
||||||
|
ARG VLLM_REPO="https://github.com/ROCm/vllm.git"
|
||||||
|
ARG VLLM_BRANCH="9f6b92db47c3444b7a7d67451ba0c3a2d6af4c2c"
|
||||||
|
RUN if [ "$BUILD_VLLM" = "1" ]; then \
|
||||||
|
git clone ${VLLM_REPO} \
|
||||||
|
&& cd vllm \
|
||||||
|
&& git checkout ${VLLM_BRANCH} \
|
||||||
|
&& python -m pip install -r requirements/rocm.txt \
|
||||||
|
&& python setup.py clean --all \
|
||||||
|
&& python setup.py develop; \
|
||||||
|
fi
|
||||||
|
|
||||||
|
# -----------------------
|
||||||
|
# Build SGLang
|
||||||
|
ARG BUILD_TYPE=all
|
||||||
|
|
||||||
RUN pip install IPython \
|
RUN pip install IPython \
|
||||||
&& pip install orjson \
|
&& pip install orjson \
|
||||||
@@ -48,18 +98,28 @@ RUN pip install IPython \
|
|||||||
&& pip install torchao \
|
&& pip install torchao \
|
||||||
&& pip install pybind11
|
&& pip install pybind11
|
||||||
|
|
||||||
RUN pip uninstall -y triton
|
RUN pip uninstall -y sgl_kernel sglang
|
||||||
RUN git clone ${TRITON_REPO} \
|
RUN git clone ${SGL_REPO} \
|
||||||
&& cd triton \
|
&& cd sglang \
|
||||||
&& git checkout ${TRITON_COMMIT} \
|
&& if [ "${SGL_BRANCH}" = ${SGL_DEFAULT} ]; then \
|
||||||
&& cd python \
|
echo "Using ${SGL_DEFAULT}, default branch."; \
|
||||||
&& python3 setup.py install
|
git checkout ${SGL_DEFAULT}; \
|
||||||
|
else \
|
||||||
|
echo "Using ${SGL_BRANCH} branch."; \
|
||||||
|
git checkout ${SGL_BRANCH}; \
|
||||||
|
fi \
|
||||||
|
&& cd sgl-kernel \
|
||||||
|
&& rm -f pyproject.toml \
|
||||||
|
&& mv pyproject_rocm.toml pyproject.toml \
|
||||||
|
&& AMDGPU_TARGET=$GPU_ARCH_LIST python setup_rocm.py install \
|
||||||
|
&& cd .. \
|
||||||
|
&& if [ "$BUILD_TYPE" = "srt" ]; then \
|
||||||
|
python -m pip --no-cache-dir install -e "python[srt_hip]"; \
|
||||||
|
else \
|
||||||
|
python -m pip --no-cache-dir install -e "python[all_hip]"; \
|
||||||
|
fi
|
||||||
|
|
||||||
RUN git clone ${AITER_REPO} \
|
RUN python -m pip cache purge
|
||||||
&& cd aiter \
|
|
||||||
&& git checkout ${AITER_COMMIT} \
|
|
||||||
&& git submodule update --init --recursive \
|
|
||||||
&& PREBUILD_KERNELS=1 GPU_ARCHS=gfx942 python3 setup.py develop
|
|
||||||
|
|
||||||
# Copy config files to support MI300X in virtualized environments (MI300X_VF). Symlinks will not be created in image build.
|
# Copy config files to support MI300X in virtualized environments (MI300X_VF). Symlinks will not be created in image build.
|
||||||
RUN find /sgl-workspace/sglang/python/sglang/srt/layers/quantization/configs/ \
|
RUN find /sgl-workspace/sglang/python/sglang/srt/layers/quantization/configs/ \
|
||||||
@@ -67,13 +127,13 @@ RUN find /sgl-workspace/sglang/python/sglang/srt/layers/quantization/configs/ \
|
|||||||
-type f -name '*MI300X*' | xargs -I {} sh -c 'vf_config=$(echo "$1" | sed "s/MI300X/MI300X_VF/"); cp "$1" "$vf_config"' -- {}
|
-type f -name '*MI300X*' | xargs -I {} sh -c 'vf_config=$(echo "$1" | sed "s/MI300X/MI300X_VF/"); cp "$1" "$vf_config"' -- {}
|
||||||
|
|
||||||
# Performance environment variable.
|
# Performance environment variable.
|
||||||
|
|
||||||
ENV HIP_FORCE_DEV_KERNARG=1
|
ENV HIP_FORCE_DEV_KERNARG=1
|
||||||
ENV HSA_NO_SCRATCH_RECLAIM=1
|
ENV HSA_NO_SCRATCH_RECLAIM=1
|
||||||
ENV SGLANG_SET_CPU_AFFINITY=1
|
ENV SGLANG_SET_CPU_AFFINITY=1
|
||||||
ENV SGLANG_ALLOW_OVERWRITE_LONGER_CONTEXT_LEN=1
|
ENV SGLANG_ALLOW_OVERWRITE_LONGER_CONTEXT_LEN=1
|
||||||
ENV NCCL_MIN_NCHANNELS=112
|
ENV NCCL_MIN_NCHANNELS=112
|
||||||
|
|
||||||
|
ENV SGLANG_USE_AITER=1
|
||||||
ENV SGLANG_MOE_PADDING=1
|
ENV SGLANG_MOE_PADDING=1
|
||||||
ENV VLLM_FP8_PADDING=1
|
ENV VLLM_FP8_PADDING=1
|
||||||
ENV VLLM_FP8_ACT_PADDING=1
|
ENV VLLM_FP8_ACT_PADDING=1
|
||||||
|
|||||||
@@ -79,7 +79,6 @@ blackwell = [
|
|||||||
srt_hip = [
|
srt_hip = [
|
||||||
"sglang[runtime_common]",
|
"sglang[runtime_common]",
|
||||||
"torch",
|
"torch",
|
||||||
"vllm==0.6.7.dev2",
|
|
||||||
]
|
]
|
||||||
|
|
||||||
# xpu is not enabled in public vllm and torch whl,
|
# xpu is not enabled in public vllm and torch whl,
|
||||||
|
|||||||
@@ -42,7 +42,7 @@ _use_aiter = get_bool_env_var("SGLANG_USE_AITER") and _is_hip
|
|||||||
|
|
||||||
if _use_aiter:
|
if _use_aiter:
|
||||||
import aiter
|
import aiter
|
||||||
from aiter import gemm_a8w8_blockscale_CK, get_hip_quant
|
from aiter import gemm_a8w8_blockscale, get_hip_quant
|
||||||
|
|
||||||
aiter_per1x128_quant = get_hip_quant(aiter.QuantType.per_1x128)
|
aiter_per1x128_quant = get_hip_quant(aiter.QuantType.per_1x128)
|
||||||
|
|
||||||
@@ -274,7 +274,7 @@ def aiter_w8a8_block_fp8_linear(
|
|||||||
output_shape = [*input.shape[:-1], weight.shape[0]]
|
output_shape = [*input.shape[:-1], weight.shape[0]]
|
||||||
|
|
||||||
q_input, x_scale = aiter_per1x128_quant(input_2d, quant_dtype=aiter.dtypes.fp8)
|
q_input, x_scale = aiter_per1x128_quant(input_2d, quant_dtype=aiter.dtypes.fp8)
|
||||||
output = gemm_a8w8_blockscale_CK(
|
output = gemm_a8w8_blockscale(
|
||||||
q_input, weight, x_scale, weight_scale, dtype=input.dtype
|
q_input, weight, x_scale, weight_scale, dtype=input.dtype
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user