From a562c8a35c93d70374e2d3b57c12f66718113f17 Mon Sep 17 00:00:00 2001 From: mqhc2020 Date: Mon, 14 Jul 2025 14:13:09 +0800 Subject: [PATCH] [Dockerfile] Multi-arch support for ROCm (#7902) Co-authored-by: Lin, Soga Co-authored-by: HaiShaw --- docker/Dockerfile.rocm | 146 ++++++++++++------ python/pyproject.toml | 1 - .../srt/layers/quantization/fp8_utils.py | 4 +- 3 files changed, 105 insertions(+), 46 deletions(-) diff --git a/docker/Dockerfile.rocm b/docker/Dockerfile.rocm index 761672a32..15722b52f 100644 --- a/docker/Dockerfile.rocm +++ b/docker/Dockerfile.rocm @@ -1,46 +1,96 @@ # Usage (to build SGLang ROCm docker image): -# docker build --build-arg SGL_BRANCH=v0.4.9.post2 -t v0.4.9.post2-rocm630 -f Dockerfile.rocm . +# docker build --build-arg SGL_BRANCH=v0.4.9.post1 --build-arg GPU_ARCH=gfx942 -t v0.4.9.post1-rocm630-mi30x -f Dockerfile.rocm . +# docker build --build-arg SGL_BRANCH=v0.4.9.post1 --build-arg GPU_ARCH=gfx950 -t v0.4.9.post1-rocm700-mi35x -f Dockerfile.rocm . -# default base image -ARG BASE_IMAGE="rocm/sgl-dev:vllm20250114" +# Default base images +ARG BASE_IMAGE_950="rocm/7.0-preview:rocm7.0_preview_ubuntu_22.04_vllm_0.8.5_mi35X_prealpha" +ARG BASE_IMAGE_942="rocm/sgl-dev:vllm20250114" -FROM $BASE_IMAGE AS base -USER root +# This is necessary for scope purpose +ARG GPU_ARCH=gfx950 -WORKDIR /sgl-workspace -ARG BUILD_TYPE=all -ARG SGL_REPO="https://github.com/sgl-project/sglang" -ENV SGL_DEFAULT="main" +# =============================== +# Base image 942 and args +FROM $BASE_IMAGE_942 AS gfx942 +ENV BUILD_VLLM="0" +ENV BUILD_TRITON="1" +ENV BUILD_AITER_ALL="1" +ENV AITER_COMMIT="v0.1.4" + +# =============================== +# Base image 950 and args +FROM $BASE_IMAGE_950 AS gfx950 +ENV BUILD_VLLM="0" +ENV BUILD_TRITON="0" +ENV BUILD_AITER_ALL="1" +ENV AITER_COMMIT="v0.1.4" + +# =============================== +# Chosen arch and args +FROM ${GPU_ARCH} + +# This is necessary for scope purpose, again +ARG GPU_ARCH=gfx950 +ENV GPU_ARCH_LIST=${GPU_ARCH:-${PYTORCH_ROCM_ARCH}} + +ARG SGL_REPO="https://github.com/sgl-project/sglang.git" +ARG SGL_DEFAULT="main" ARG SGL_BRANCH=${SGL_DEFAULT} ARG TRITON_REPO="https://github.com/ROCm/triton.git" ARG TRITON_COMMIT="improve_fa_decode_3.0.0" - ARG AITER_REPO="https://github.com/ROCm/aiter.git" -ARG AITER_COMMIT="v0.1.3" -RUN git clone ${SGL_REPO} \ - && cd sglang \ - && if [ "${SGL_BRANCH}" = ${SGL_DEFAULT} ]; then \ - echo "Using ${SGL_DEFAULT}, default branch."; \ - else \ - echo "Using ${SGL_BRANCH} branch."; \ - git checkout ${SGL_BRANCH}; \ - fi \ - && cd sgl-kernel \ - && rm -f pyproject.toml \ - && mv pyproject_rocm.toml pyproject.toml \ - && python setup_rocm.py install \ - && cd .. \ - && if [ "$BUILD_TYPE" = "srt" ]; then \ - python -m pip --no-cache-dir install -e "python[srt_hip]"; \ - else \ - python -m pip --no-cache-dir install -e "python[all_hip]"; \ - fi +USER root -RUN cp -r /sgl-workspace/sglang /sglang -RUN python -m pip cache purge +# Install some basic utilities +RUN python -m pip install --upgrade pip && pip install setuptools_scm +RUN apt-get purge -y sccache; python -m pip uninstall -y sccache; rm -f "$(which sccache)" + +WORKDIR /sgl-workspace + +# ----------------------- +# AITER +RUN pip uninstall -y aiter +RUN git clone ${AITER_REPO} \ + && cd aiter \ + && git checkout ${AITER_COMMIT} \ + && git submodule update --init --recursive +RUN cd aiter \ + && if [ "$BUILD_AITER_ALL" = "1" ]; then \ + PREBUILD_KERNELS=1 GPU_ARCHS=$GPU_ARCH_LIST python setup.py develop; \ + else \ + GPU_ARCHS=$GPU_ARCH_LIST python setup.py develop; \ + fi + +# ----------------------- +# Triton +RUN if [ "$BUILD_TRITON" = "1" ]; then \ + pip uninstall -y triton \ + && git clone ${TRITON_REPO} \ + && cd triton \ + && git checkout ${TRITON_COMMIT} \ + && cd python \ + && python setup.py install; \ + fi + +# ----------------------- +# Build vLLM +ARG VLLM_REPO="https://github.com/ROCm/vllm.git" +ARG VLLM_BRANCH="9f6b92db47c3444b7a7d67451ba0c3a2d6af4c2c" +RUN if [ "$BUILD_VLLM" = "1" ]; then \ + git clone ${VLLM_REPO} \ + && cd vllm \ + && git checkout ${VLLM_BRANCH} \ + && python -m pip install -r requirements/rocm.txt \ + && python setup.py clean --all \ + && python setup.py develop; \ + fi + +# ----------------------- +# Build SGLang +ARG BUILD_TYPE=all RUN pip install IPython \ && pip install orjson \ @@ -48,18 +98,28 @@ RUN pip install IPython \ && pip install torchao \ && pip install pybind11 -RUN pip uninstall -y triton -RUN git clone ${TRITON_REPO} \ - && cd triton \ - && git checkout ${TRITON_COMMIT} \ - && cd python \ - && python3 setup.py install +RUN pip uninstall -y sgl_kernel sglang +RUN git clone ${SGL_REPO} \ + && cd sglang \ + && if [ "${SGL_BRANCH}" = ${SGL_DEFAULT} ]; then \ + echo "Using ${SGL_DEFAULT}, default branch."; \ + git checkout ${SGL_DEFAULT}; \ + else \ + echo "Using ${SGL_BRANCH} branch."; \ + git checkout ${SGL_BRANCH}; \ + fi \ + && cd sgl-kernel \ + && rm -f pyproject.toml \ + && mv pyproject_rocm.toml pyproject.toml \ + && AMDGPU_TARGET=$GPU_ARCH_LIST python setup_rocm.py install \ + && cd .. \ + && if [ "$BUILD_TYPE" = "srt" ]; then \ + python -m pip --no-cache-dir install -e "python[srt_hip]"; \ + else \ + python -m pip --no-cache-dir install -e "python[all_hip]"; \ + fi -RUN git clone ${AITER_REPO} \ - && cd aiter \ - && git checkout ${AITER_COMMIT} \ - && git submodule update --init --recursive \ - && PREBUILD_KERNELS=1 GPU_ARCHS=gfx942 python3 setup.py develop +RUN python -m pip cache purge # Copy config files to support MI300X in virtualized environments (MI300X_VF). Symlinks will not be created in image build. RUN find /sgl-workspace/sglang/python/sglang/srt/layers/quantization/configs/ \ @@ -67,13 +127,13 @@ RUN find /sgl-workspace/sglang/python/sglang/srt/layers/quantization/configs/ \ -type f -name '*MI300X*' | xargs -I {} sh -c 'vf_config=$(echo "$1" | sed "s/MI300X/MI300X_VF/"); cp "$1" "$vf_config"' -- {} # Performance environment variable. - ENV HIP_FORCE_DEV_KERNARG=1 ENV HSA_NO_SCRATCH_RECLAIM=1 ENV SGLANG_SET_CPU_AFFINITY=1 ENV SGLANG_ALLOW_OVERWRITE_LONGER_CONTEXT_LEN=1 ENV NCCL_MIN_NCHANNELS=112 +ENV SGLANG_USE_AITER=1 ENV SGLANG_MOE_PADDING=1 ENV VLLM_FP8_PADDING=1 ENV VLLM_FP8_ACT_PADDING=1 diff --git a/python/pyproject.toml b/python/pyproject.toml index eb0f1b8c5..86467457a 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -79,7 +79,6 @@ blackwell = [ srt_hip = [ "sglang[runtime_common]", "torch", - "vllm==0.6.7.dev2", ] # xpu is not enabled in public vllm and torch whl, diff --git a/python/sglang/srt/layers/quantization/fp8_utils.py b/python/sglang/srt/layers/quantization/fp8_utils.py index 405351818..3ab8634ac 100644 --- a/python/sglang/srt/layers/quantization/fp8_utils.py +++ b/python/sglang/srt/layers/quantization/fp8_utils.py @@ -42,7 +42,7 @@ _use_aiter = get_bool_env_var("SGLANG_USE_AITER") and _is_hip if _use_aiter: import aiter - from aiter import gemm_a8w8_blockscale_CK, get_hip_quant + from aiter import gemm_a8w8_blockscale, get_hip_quant aiter_per1x128_quant = get_hip_quant(aiter.QuantType.per_1x128) @@ -274,7 +274,7 @@ def aiter_w8a8_block_fp8_linear( output_shape = [*input.shape[:-1], weight.shape[0]] q_input, x_scale = aiter_per1x128_quant(input_2d, quant_dtype=aiter.dtypes.fp8) - output = gemm_a8w8_blockscale_CK( + output = gemm_a8w8_blockscale( q_input, weight, x_scale, weight_scale, dtype=input.dtype )