From 91b3555d2d29d33460466f41d2a6919795406f8e Mon Sep 17 00:00:00 2001 From: Hubert Lu <55214931+hubertlu-tw@users.noreply.github.com> Date: Wed, 10 Sep 2025 12:50:05 -0700 Subject: [PATCH] Add tests to AMD CI for MI35x (#9662) Co-authored-by: Sai Enduri --- .github/workflows/pr-test-amd.yml | 65 ++++++----- python/sglang/srt/models/deepseek_v2.py | 11 +- scripts/ci/amd_ci_exec.sh | 17 +++ scripts/ci/amd_ci_install_dependency.sh | 35 +++++- scripts/ci/amd_ci_start_container.sh | 142 ++++++++++-------------- test/srt/run_suite.py | 4 + test/srt/test_gpt_oss_common.py | 7 +- 7 files changed, 159 insertions(+), 122 deletions(-) diff --git a/.github/workflows/pr-test-amd.yml b/.github/workflows/pr-test-amd.yml index 856f9f56f..2c7e2c652 100644 --- a/.github/workflows/pr-test-amd.yml +++ b/.github/workflows/pr-test-amd.yml @@ -28,6 +28,7 @@ jobs: if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && github.event.pull_request.draft == false strategy: + fail-fast: false matrix: runner: [linux-mi300-gpu-1, linux-mi325-gpu-1] runs-on: ${{matrix.runner}} @@ -54,8 +55,9 @@ jobs: if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && github.event.pull_request.draft == false strategy: + fail-fast: false matrix: - runner: [linux-mi300-gpu-2, linux-mi325-gpu-2] + runner: [linux-mi300-gpu-2, linux-mi325-gpu-2, linux-mi35x-gpu-2] runs-on: ${{matrix.runner}} steps: - name: Checkout code @@ -70,7 +72,7 @@ jobs: run: bash scripts/ci/amd_ci_install_dependency.sh - name: Evaluate accuracy (TP=2) - timeout-minutes: 30 + timeout-minutes: 60 run: | bash scripts/ci/amd_ci_exec.sh -e SGLANG_USE_AITER=0 python3 test_moe_eval_accuracy_large.py @@ -78,6 +80,7 @@ jobs: if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && github.event.pull_request.draft == false strategy: + fail-fast: false matrix: runner: [linux-mi300-gpu-1, linux-mi325-gpu-1] runs-on: ${{matrix.runner}} @@ -102,6 +105,7 @@ jobs: if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && github.event.pull_request.draft == false strategy: + fail-fast: false matrix: runner: [linux-mi300-gpu-1, linux-mi325-gpu-1] runs-on: ${{matrix.runner}} @@ -142,6 +146,7 @@ jobs: if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && github.event.pull_request.draft == false strategy: + fail-fast: false matrix: runner: [linux-mi300-gpu-1, linux-mi325-gpu-1] runs-on: ${{matrix.runner}} @@ -176,6 +181,7 @@ jobs: if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && github.event.pull_request.draft == false strategy: + fail-fast: false matrix: runner: [linux-mi300-gpu-2, linux-mi325-gpu-2] runs-on: ${{matrix.runner}} @@ -242,10 +248,36 @@ jobs: run: | bash scripts/ci/amd_ci_exec.sh python3 run_suite.py --suite per-commit-amd --auto-partition-id ${{ matrix.part }} --auto-partition-size 8 + unit-test-backend-1-gpu-amd-mi35x: + if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && + github.event.pull_request.draft == false + strategy: + fail-fast: false + matrix: + runner: [linux-mi35x-gpu-1] + runs-on: ${{matrix.runner}} + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Start CI container + run: bash scripts/ci/amd_ci_start_container.sh + env: + GITHUB_WORKSPACE: ${{ github.workspace }} + + - name: Install dependencies + run: bash scripts/ci/amd_ci_install_dependency.sh + + - name: Run test + timeout-minutes: 50 + run: | + bash scripts/ci/amd_ci_exec.sh python3 run_suite.py --suite per-commit-amd-mi35x + unit-test-backend-2-gpu-amd: if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && github.event.pull_request.draft == false strategy: + fail-fast: false matrix: runner: [linux-mi300-gpu-2, linux-mi325-gpu-2] runs-on: ${{matrix.runner}} @@ -270,6 +302,7 @@ jobs: if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && github.event.pull_request.draft == false strategy: + fail-fast: false matrix: runner: [linux-mi300-gpu-8] runs-on: ${{matrix.runner}} @@ -290,30 +323,6 @@ jobs: run: | bash scripts/ci/amd_ci_exec.sh python3 run_suite.py --suite per-commit-8-gpu-amd --timeout-per-file 3600 - unit-test-backend-8-gpu-CAR-amd: - if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && - github.event.pull_request.draft == false - strategy: - matrix: - runner: [linux-mi300-gpu-8] - runs-on: ${{matrix.runner}} - steps: - - name: Checkout code - uses: actions/checkout@v4 - - - name: Start CI container - run: bash scripts/ci/amd_ci_start_container.sh - env: - GITHUB_WORKSPACE: ${{ github.workspace }} - - - name: Install dependencies - run: bash scripts/ci/amd_ci_install_dependency.sh - - - name: Run CustomAllReduce test - timeout-minutes: 20 - run: | - bash scripts/ci/amd_ci_exec.sh -e CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python3 -m unittest test_custom_allreduce.TestCustomAllReduce - unit-test-sgl-kernel-amd: if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && github.event.pull_request.draft == false @@ -350,8 +359,8 @@ jobs: needs: [ accuracy-test-1-gpu-amd, mla-test-1-gpu-amd, bench-test-2-gpu-amd, accuracy-test-2-gpu-amd, performance-test-1-gpu-part-1-amd, performance-test-1-gpu-part-2-amd, - unit-test-backend-1-gpu-amd, unit-test-backend-2-gpu-amd, unit-test-backend-8-gpu-amd, - unit-test-sgl-kernel-amd + unit-test-backend-1-gpu-amd, unit-test-backend-1-gpu-amd-mi35x, unit-test-backend-2-gpu-amd, + unit-test-backend-8-gpu-amd, unit-test-sgl-kernel-amd ] runs-on: ubuntu-latest steps: diff --git a/python/sglang/srt/models/deepseek_v2.py b/python/sglang/srt/models/deepseek_v2.py index 168ad9f29..b5535f6d3 100644 --- a/python/sglang/srt/models/deepseek_v2.py +++ b/python/sglang/srt/models/deepseek_v2.py @@ -2027,7 +2027,10 @@ class DeepseekV2DecoderLayer(nn.Module): quant_format = ( "mxfp4" if _is_gfx95_supported - and self.self_attn.fused_qkv_a_proj_with_mqa.weight == torch.uint8 + and getattr(self.self_attn, "fused_qkv_a_proj_with_mqa", None) is not None + and getattr(self.self_attn.fused_qkv_a_proj_with_mqa, "weight", None) + is not None + and self.self_attn.fused_qkv_a_proj_with_mqa.weight.dtype == torch.uint8 else "" ) @@ -2582,7 +2585,11 @@ class DeepseekV2ForCausalLM(nn.Module): 0, (-1, self_attn.qk_nope_head_dim + self_attn.v_head_dim) ).split([self_attn.qk_nope_head_dim, self_attn.v_head_dim], dim=1) - if _use_aiter_gfx95 and self.quant_config.get_name() == "quark": + if ( + _use_aiter_gfx95 + and self.quant_config is not None + and self.quant_config.get_name() == "quark" + ): w_kc, self_attn.w_scale_k, w_vc, self_attn.w_scale_v = ( quark_post_load_weights(self_attn, w, "mxfp4") ) diff --git a/scripts/ci/amd_ci_exec.sh b/scripts/ci/amd_ci_exec.sh index 411fe2a75..3bd940eb1 100755 --- a/scripts/ci/amd_ci_exec.sh +++ b/scripts/ci/amd_ci_exec.sh @@ -1,6 +1,18 @@ #!/bin/bash set -euo pipefail +# Detect GPU family from hostname (e.g., linux-mi35x-gpu-1-xxxxx-runner-zzzzz) +HOSTNAME_VALUE=$(hostname) +GPU_FAMILY="" + +# Host names look like: linux-mi35x-gpu-1-xxxxx-runner-zzzzz +if [[ "${HOSTNAME_VALUE}" =~ ^linux-(mi[0-9]+[a-z]*)-gpu-[0-9]+ ]]; then + GPU_FAMILY="${BASH_REMATCH[1]}" + echo "Detected GPU family from hostname: ${GPU_FAMILY}" +else + echo "Warning: could not parse GPU family from '${HOSTNAME_VALUE}'" +fi + WORKDIR="/sglang-checkout/test/srt" declare -A ENV_MAP=( [SGLANG_AMD_CI]=1 @@ -8,6 +20,11 @@ declare -A ENV_MAP=( [SGLANG_USE_AITER]=1 ) +# Conditionally add GPU_ARCHS only for mi35x +if [[ "${GPU_FAMILY}" == "mi35x" ]]; then + ENV_MAP[GPU_ARCHS]="gfx950" +fi + # Parse -w/--workdir and -e ENV=VAL while [[ $# -gt 0 ]]; do case "$1" in diff --git a/scripts/ci/amd_ci_install_dependency.sh b/scripts/ci/amd_ci_install_dependency.sh index 3c8061351..518f0dde9 100755 --- a/scripts/ci/amd_ci_install_dependency.sh +++ b/scripts/ci/amd_ci_install_dependency.sh @@ -1,19 +1,44 @@ #!/bin/bash set -euo pipefail +HOSTNAME_VALUE=$(hostname) +GPU_ARCH="mi30x" # default + +# Host names look like: linux-mi35x-gpu-1-xxxxx-runner-zzzzz +if [[ "${HOSTNAME_VALUE}" =~ ^linux-(mi[0-9]+[a-z]*)-gpu-[0-9]+ ]]; then + GPU_ARCH="${BASH_REMATCH[1]}" + echo "Detected GPU architecture from hostname: ${GPU_ARCH}" +else + echo "Warning: could not parse GPU architecture from '${HOSTNAME_VALUE}', defaulting to ${GPU_ARCH}" +fi # Install the required dependencies in CI. docker exec ci_sglang pip install --upgrade pip docker exec ci_sglang pip uninstall sgl-kernel -y || true docker exec -w /sglang-checkout/sgl-kernel ci_sglang bash -c "rm -f pyproject.toml && mv pyproject_rocm.toml pyproject.toml && python3 setup_rocm.py install" -docker exec ci_sglang pip install -e "python[dev_hip]" + +case "${GPU_ARCH}" in + mi35x) + echo "Runner uses ${GPU_ARCH}; will fetch mi35x image." + docker exec ci_sglang pip install -e "python[dev_hip]" --no-deps # TODO: only for mi35x + # For lmms_evals evaluating MMMU + docker exec -w / ci_sglang git clone --branch v0.3.3 --depth 1 https://github.com/EvolvingLMMs-Lab/lmms-eval.git + docker exec -w /lmms-eval ci_sglang pip install -e . --no-deps # TODO: only for mi35x + ;; + mi30x|mi300|mi325) + echo "Runner uses ${GPU_ARCH}; will fetch mi30x image." + docker exec ci_sglang pip install -e "python[dev_hip]" + # For lmms_evals evaluating MMMU + docker exec -w / ci_sglang git clone --branch v0.3.3 --depth 1 https://github.com/EvolvingLMMs-Lab/lmms-eval.git + docker exec -w /lmms-eval ci_sglang pip install -e . + ;; + *) + echo "Runner architecture '${GPU_ARCH}' unrecognised;" >&2 + ;; +esac docker exec -w / ci_sglang git clone https://github.com/merrymercy/human-eval.git docker exec -w /human-eval ci_sglang pip install -e . -# For lmms_evals evaluating MMMU -docker exec -w / ci_sglang git clone --branch v0.3.3 --depth 1 https://github.com/EvolvingLMMs-Lab/lmms-eval.git -docker exec -w /lmms-eval ci_sglang pip install -e . - docker exec -w / ci_sglang mkdir -p /dummy-grok mkdir -p dummy-grok && wget https://sharkpublic.blob.core.windows.net/sharkpublic/sglang/dummy_grok.json -O dummy-grok/config.json docker cp ./dummy-grok ci_sglang:/ diff --git a/scripts/ci/amd_ci_start_container.sh b/scripts/ci/amd_ci_start_container.sh index 352d96347..a1f281c8d 100755 --- a/scripts/ci/amd_ci_start_container.sh +++ b/scripts/ci/amd_ci_start_container.sh @@ -3,7 +3,7 @@ set -euo pipefail # Get version from SGLang version.py file SGLANG_VERSION_FILE="$(dirname "$0")/../../python/sglang/version.py" -SGLANG_VERSION="v0.5.0rc0" # Default version, will be overridden if version.py is found +SGLANG_VERSION="v0.5.0rc0" # Default version, will be overridden if version.py is found if [ -f "$SGLANG_VERSION_FILE" ]; then VERSION_FROM_FILE=$(python3 -c ' @@ -25,130 +25,102 @@ else echo "Warning: version.py not found, using default version: $SGLANG_VERSION" >&2 fi + # Default base tags (can be overridden by command line arguments) DEFAULT_MI30X_BASE_TAG="${SGLANG_VERSION}-rocm630-mi30x" DEFAULT_MI35X_BASE_TAG="${SGLANG_VERSION}-rocm700-mi35x" # Parse command line arguments -MI30X_BASE_TAG="$DEFAULT_MI30X_BASE_TAG" -MI35X_BASE_TAG="$DEFAULT_MI35X_BASE_TAG" +MI30X_BASE_TAG="${DEFAULT_MI30X_BASE_TAG}" +MI35X_BASE_TAG="${DEFAULT_MI35X_BASE_TAG}" while [[ $# -gt 0 ]]; do case $1 in - --mi30x-base-tag) - MI30X_BASE_TAG="$2" - shift 2 - ;; - --mi35x-base-tag) - MI35X_BASE_TAG="$2" - shift 2 - ;; + --mi30x-base-tag) MI30X_BASE_TAG="$2"; shift 2;; + --mi35x-base-tag) MI35X_BASE_TAG="$2"; shift 2;; -h|--help) echo "Usage: $0 [--mi30x-base-tag TAG] [--mi35x-base-tag TAG]" - echo " --mi30x-base-tag TAG Base tag for mi30x images (default: $DEFAULT_MI30X_BASE_TAG)" - echo " --mi35x-base-tag TAG Base tag for mi35x images (default: $DEFAULT_MI35X_BASE_TAG)" exit 0 ;; - *) - echo "Unknown option $1" - echo "Use --help for usage information" - exit 1 - ;; + *) echo "Unknown option $1"; exit 1;; esac done + + +# Detect GPU architecture from the Kubernetes runner hostname +HOSTNAME_VALUE=$(hostname) +GPU_ARCH="mi30x" # default + +# Host names look like: linux-mi35x-gpu-1-xxxxx-runner-zzzzz +if [[ "${HOSTNAME_VALUE}" =~ ^linux-(mi[0-9]+[a-z]*)-gpu-[0-9]+ ]]; then + GPU_ARCH="${BASH_REMATCH[1]}" + echo "Detected GPU architecture from hostname: ${GPU_ARCH}" +else + echo "Warning: could not parse GPU architecture from '${HOSTNAME_VALUE}', defaulting to ${GPU_ARCH}" +fi + +# Normalise / collapse architectures we don’t yet build specifically for +case "${GPU_ARCH}" in + mi35x) + echo "Runner uses ${GPU_ARCH}; will fetch mi35x image." + ;; + mi30x|mi300|mi325) + echo "Runner uses ${GPU_ARCH}; will fetch mi30x image." + GPU_ARCH="mi30x" + ;; + *) + echo "Runner architecture '${GPU_ARCH}' unrecognised; defaulting to mi30x image." >&2 + GPU_ARCH="mi30x" + ;; +esac + + # Set up DEVICE_FLAG based on Kubernetes pod info -if [ -f "/etc/podinfo/gha-render-devices" ]; then +if [[ -f /etc/podinfo/gha-render-devices ]]; then DEVICE_FLAG=$(cat /etc/podinfo/gha-render-devices) else DEVICE_FLAG="--device /dev/dri" fi - -# Function to find latest available image for a given GPU architecture +# Find the latest image find_latest_image() { local gpu_arch=$1 - local base_tag + local base_tag days_back image_tag - if [ "$gpu_arch" == "mi30x" ]; then - base_tag="$MI30X_BASE_TAG" - elif [ "$gpu_arch" == "mi35x" ]; then - base_tag="$MI35X_BASE_TAG" - else - echo "Error: Unsupported GPU architecture '$gpu_arch'" >&2 - return 1 - fi - - local days_back=0 - - while [ $days_back -lt 7 ]; do - local check_date=$(date -d "$days_back days ago" +%Y%m%d) - local image_tag="${base_tag}-${check_date}" + case "${gpu_arch}" in + mi30x) base_tag="${MI30X_BASE_TAG}" ;; + mi35x) base_tag="${MI35X_BASE_TAG}" ;; + *) echo "Error: unsupported GPU architecture '${gpu_arch}'" >&2; return 1 ;; + esac + for days_back in {0..6}; do + image_tag="${base_tag}-$(date -d "${days_back} days ago" +%Y%m%d)" echo "Checking for image: rocm/sgl-dev:${image_tag}" >&2 - - # Check if the image exists by trying to get its manifest if docker manifest inspect "rocm/sgl-dev:${image_tag}" >/dev/null 2>&1; then echo "Found available image: rocm/sgl-dev:${image_tag}" >&2 echo "rocm/sgl-dev:${image_tag}" return 0 fi - - days_back=$((days_back + 1)) done - echo "Error: No ${gpu_arch} image found in the last 7 days for version ${base_tag}" >&2 - - # Final fallback to specific hardcoded images - echo "Using final fallback images..." >&2 - if [ "$gpu_arch" == "mi30x" ]; then - echo "rocm/sgl-dev:v0.5.0rc0-rocm630-mi30x-20250812" - elif [ "$gpu_arch" == "mi35x" ]; then + echo "Error: no ${gpu_arch} image found in the last 7 days for base ${base_tag}" >&2 + echo "Using hard-coded fallback…" >&2 + if [[ "${gpu_arch}" == "mi35x" ]]; then echo "rocm/sgl-dev:v0.5.0rc0-rocm700-mi35x-20250812" else - echo "rocm/sgl-dev:v0.5.0rc0-rocm630-mi30x-20250812" # Default to mi30x + echo "rocm/sgl-dev:v0.5.0rc0-rocm630-mi30x-20250812" fi - - return 0 } -# Determine image finder and fallback based on runner -# In Kubernetes, the hostname contains the GPU type (e.g., linux-mi300-gpu-1-bgg8r-runner-vknlb) -# Extract the GPU type from hostname -HOSTNAME_VALUE=$(hostname) -RUNNER_NAME="unknown" - -if [[ "${HOSTNAME_VALUE}" =~ ^(linux-mi[0-9]+-gpu-[0-9]+) ]]; then - RUNNER_NAME="${BASH_REMATCH[1]}" - echo "Extracted runner from hostname: ${RUNNER_NAME}" -else - echo "Could not extract runner info from hostname: ${HOSTNAME_VALUE}" -fi - -echo "The runner is: ${RUNNER_NAME}" -GPU_ARCH="mi30x" - -# Check for mi350/mi355 runners -if [[ "${RUNNER_NAME}" =~ ^linux-mi350-gpu-[0-9]+$ ]] || [[ "${RUNNER_NAME}" =~ ^linux-mi355-gpu-[0-9]+$ ]]; then - echo "Runner is ${RUNNER_NAME}, will find mi35x image." - GPU_ARCH="mi35x" -# Check for mi300/mi325 runners -elif [[ "${RUNNER_NAME}" =~ ^linux-mi300-gpu-[0-9]+$ ]] || [[ "${RUNNER_NAME}" =~ ^linux-mi325-gpu-[0-9]+$ ]]; then - echo "Runner is ${RUNNER_NAME}, will find mi30x image." -else - echo "Runner type not recognized: '${RUNNER_NAME}'" - echo "Defaulting to find mi30x image" -fi - -# Find and pull the latest image +# Pull and run the latest image IMAGE=$(find_latest_image "${GPU_ARCH}") -echo "Pulling Docker image: $IMAGE" -docker pull "$IMAGE" +echo "Pulling Docker image: ${IMAGE}" +docker pull "${IMAGE}" -# Run the container -echo "Starting container: ci_sglang" -docker run -dt --user root --device=/dev/kfd $DEVICE_FLAG \ +echo "Launching container: ci_sglang" +docker run -dt --user root --device=/dev/kfd ${DEVICE_FLAG} \ -v "${GITHUB_WORKSPACE:-$PWD}:/sglang-checkout" \ --ipc=host --group-add video \ --shm-size 32g \ @@ -157,4 +129,4 @@ docker run -dt --user root --device=/dev/kfd $DEVICE_FLAG \ --security-opt seccomp=unconfined \ -w /sglang-checkout \ --name ci_sglang \ - "$IMAGE" + "${IMAGE}" diff --git a/test/srt/run_suite.py b/test/srt/run_suite.py index 28ab321a0..b030db76b 100644 --- a/test/srt/run_suite.py +++ b/test/srt/run_suite.py @@ -243,6 +243,10 @@ suite_amd = { TestFile("test_wave_attention_kernels.py", 2), TestFile("test_wave_attention_backend.py", 150), ], + "per-commit-amd-mi35x": [ + TestFile("test_mla.py", 242), + TestFile("test_gpt_oss_1gpu.py", 600), + ], "per-commit-2-gpu-amd": [ TestFile("lora/test_lora_tp.py", 116), TestFile("rl/test_update_weights_from_distributed.py", 103), diff --git a/test/srt/test_gpt_oss_common.py b/test/srt/test_gpt_oss_common.py index 5f6326b2b..6be739277 100644 --- a/test/srt/test_gpt_oss_common.py +++ b/test/srt/test_gpt_oss_common.py @@ -1,8 +1,9 @@ +import os from concurrent.futures import ThreadPoolExecutor from types import SimpleNamespace from typing import Dict, List, Literal, Optional -from sglang.srt.utils import kill_process_tree +from sglang.srt.utils import is_hip, kill_process_tree from sglang.test.run_eval import run_eval from sglang.test.test_utils import ( DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, @@ -14,6 +15,7 @@ from sglang.test.test_utils import ( ) _base_url = DEFAULT_URL_FOR_TEST +_is_hip = is_hip() class BaseTestGptOss(CustomTestCase): @@ -36,7 +38,8 @@ class BaseTestGptOss(CustomTestCase): if model_variant == "20b": other_args += ["--cuda-graph-max-bs", "600"] - + if _is_hip: + os.environ["SGLANG_USE_AITER"] = "0" self._run_test_raw( model=model, expected_score_of_reasoning_effort=expected_score_of_reasoning_effort,