From 4b7034ddb0c8cd8069e79a0dbbd9b6a773bac950 Mon Sep 17 00:00:00 2001 From: sogalin <39478626+sogalin@users.noreply.github.com> Date: Thu, 28 Aug 2025 22:24:34 -0700 Subject: [PATCH] ROCm 7.0 update (#9757) --- docker/Dockerfile.rocm | 38 +++++++++++++++++++++++++------------- 1 file changed, 25 insertions(+), 13 deletions(-) diff --git a/docker/Dockerfile.rocm b/docker/Dockerfile.rocm index c10ee6f1d..0af8eea03 100644 --- a/docker/Dockerfile.rocm +++ b/docker/Dockerfile.rocm @@ -1,16 +1,19 @@ # Usage (to build SGLang ROCm docker image): -# docker build --build-arg SGL_BRANCH=v0.4.9.post1 --build-arg GPU_ARCH=gfx942 -t v0.4.9.post1-rocm630-mi30x -f Dockerfile.rocm . -# docker build --build-arg SGL_BRANCH=v0.4.9.post1 --build-arg GPU_ARCH=gfx950 -t v0.4.9.post1-rocm700-mi35x -f Dockerfile.rocm . +# docker build --build-arg SGL_BRANCH=v0.5.1.post3 --build-arg GPU_ARCH=gfx942 -t v0.5.1.post3-rocm630-mi30x -f Dockerfile.rocm . +# docker build --build-arg SGL_BRANCH=v0.5.1.post3 --build-arg GPU_ARCH=gfx942-rocm700 -t v0.5.1.post3-rocm700-mi30x -f Dockerfile.rocm . +# docker build --build-arg SGL_BRANCH=v0.5.1.post3 --build-arg GPU_ARCH=gfx950 -t v0.5.1.post3-rocm700-mi35x -f Dockerfile.rocm . + # Default base images -ARG BASE_IMAGE_950="rocm/sgl-dev:rocm7.0_preview_ubuntu_22.04_vllm_0.9.2_mi35X_prealpha" ARG BASE_IMAGE_942="rocm/sgl-dev:vllm20250114" +ARG BASE_IMAGE_942_ROCM700="rocm/sgl-dev:rocm7-vllm-20250821" +ARG BASE_IMAGE_950="rocm/sgl-dev:rocm7-vllm-20250821" # This is necessary for scope purpose ARG GPU_ARCH=gfx950 # =============================== -# Base image 942 and args +# Base image 942 with rocm630 and args FROM $BASE_IMAGE_942 AS gfx942 ENV BUILD_VLLM="0" ENV BUILD_TRITON="1" @@ -20,16 +23,26 @@ ENV BUILD_MOONCAKE="1" ENV AITER_COMMIT="v0.1.4" ENV NO_DEPS_FLAG="" +# =============================== +# Base image 942 and args +FROM $BASE_IMAGE_942_ROCM700 AS gfx942-rocm700 +ENV BUILD_VLLM="0" +ENV BUILD_TRITON="0" +ENV BUILD_LLVM="0" +ENV BUILD_AITER_ALL="1" +ENV BUILD_MOONCAKE="1" +ENV AITER_COMMIT="v0.1.5" +ENV NO_DEPS_FLAG="" + # =============================== # Base image 950 and args FROM $BASE_IMAGE_950 AS gfx950 ENV BUILD_VLLM="0" ENV BUILD_TRITON="0" -ENV BUILD_LLVM="1" +ENV BUILD_LLVM="0" ENV BUILD_AITER_ALL="1" -ENV BUILD_MOONCAKE="0" -ENV AITER_COMMIT="v0.1.4" -ENV HIP_CLANG_PATH="/sgl-workspace/llvm-project/build/bin/" +ENV BUILD_MOONCAKE="1" +ENV AITER_COMMIT="v0.1.5" ENV NO_DEPS_FLAG="--no-deps" # =============================== @@ -38,7 +51,7 @@ FROM ${GPU_ARCH} # This is necessary for scope purpose, again ARG GPU_ARCH=gfx950 -ENV GPU_ARCH_LIST=${GPU_ARCH:-${PYTORCH_ROCM_ARCH}} +ENV GPU_ARCH_LIST=${GPU_ARCH%-*} ARG SGL_REPO="https://github.com/sgl-project/sglang.git" ARG SGL_DEFAULT="main" @@ -54,7 +67,7 @@ ARG LLVM_BRANCH="MainOpSelV2" ARG LLVM_COMMIT="6520ace8227ffe2728148d5f3b9872a870b0a560" ARG MOONCAKE_REPO="https://github.com/kvcache-ai/Mooncake.git" -ARG MOONCAKE_COMMIT="b63322c9e8d11e9d40a2b4ce9ccbc9c12e82af2a" +ARG MOONCAKE_COMMIT="dcdf1c784b40aa6975a8ed89fe26321b028e40e8" USER root @@ -67,6 +80,7 @@ WORKDIR /sgl-workspace # ----------------------- # llvm RUN if [ "$BUILD_LLVM" = "1" ]; then \ + ENV HIP_CLANG_PATH="/sgl-workspace/llvm-project/build/bin/" \ git clone --single-branch ${LLVM_REPO} -b ${LLVM_BRANCH} \ && cd llvm-project \ && git checkout ${LLVM_COMMIT} \ @@ -126,8 +140,6 @@ RUN if [ "$BUILD_MOONCAKE" = "1" ]; then \ apt update && apt install -y zip unzip wget && \ apt install -y gcc make libtool autoconf librdmacm-dev rdmacm-utils infiniband-diags ibverbs-utils perftest ethtool libibverbs-dev rdma-core && \ apt install -y openssh-server openmpi-bin openmpi-common libopenmpi-dev && \ - wget https://dl.google.com/go/go1.24.3.linux-amd64.tar.gz && \ - rm -rf /usr/local/go && tar -C /usr/local -xzf go1.24.3.linux-amd64.tar.gz && \ git clone ${MOONCAKE_REPO} && \ cd Mooncake && \ git checkout ${MOONCAKE_COMMIT} && \ @@ -151,7 +163,7 @@ ARG BUILD_TYPE=all RUN pip install IPython \ && pip install orjson \ && pip install python-multipart \ - && pip install torchao \ + && pip install torchao==0.9.0 \ && pip install pybind11 RUN pip uninstall -y sgl_kernel sglang