Add tests to AMD CI for MI35x (#9662)
Co-authored-by: Sai Enduri <saimanas.enduri@amd.com>
This commit is contained in:
65
.github/workflows/pr-test-amd.yml
vendored
65
.github/workflows/pr-test-amd.yml
vendored
@@ -28,6 +28,7 @@ jobs:
|
||||
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
|
||||
github.event.pull_request.draft == false
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
runner: [linux-mi300-gpu-1, linux-mi325-gpu-1]
|
||||
runs-on: ${{matrix.runner}}
|
||||
@@ -54,8 +55,9 @@ jobs:
|
||||
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
|
||||
github.event.pull_request.draft == false
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
runner: [linux-mi300-gpu-2, linux-mi325-gpu-2]
|
||||
runner: [linux-mi300-gpu-2, linux-mi325-gpu-2, linux-mi35x-gpu-2]
|
||||
runs-on: ${{matrix.runner}}
|
||||
steps:
|
||||
- name: Checkout code
|
||||
@@ -70,7 +72,7 @@ jobs:
|
||||
run: bash scripts/ci/amd_ci_install_dependency.sh
|
||||
|
||||
- name: Evaluate accuracy (TP=2)
|
||||
timeout-minutes: 30
|
||||
timeout-minutes: 60
|
||||
run: |
|
||||
bash scripts/ci/amd_ci_exec.sh -e SGLANG_USE_AITER=0 python3 test_moe_eval_accuracy_large.py
|
||||
|
||||
@@ -78,6 +80,7 @@ jobs:
|
||||
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
|
||||
github.event.pull_request.draft == false
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
runner: [linux-mi300-gpu-1, linux-mi325-gpu-1]
|
||||
runs-on: ${{matrix.runner}}
|
||||
@@ -102,6 +105,7 @@ jobs:
|
||||
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
|
||||
github.event.pull_request.draft == false
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
runner: [linux-mi300-gpu-1, linux-mi325-gpu-1]
|
||||
runs-on: ${{matrix.runner}}
|
||||
@@ -142,6 +146,7 @@ jobs:
|
||||
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
|
||||
github.event.pull_request.draft == false
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
runner: [linux-mi300-gpu-1, linux-mi325-gpu-1]
|
||||
runs-on: ${{matrix.runner}}
|
||||
@@ -176,6 +181,7 @@ jobs:
|
||||
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
|
||||
github.event.pull_request.draft == false
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
runner: [linux-mi300-gpu-2, linux-mi325-gpu-2]
|
||||
runs-on: ${{matrix.runner}}
|
||||
@@ -242,10 +248,36 @@ jobs:
|
||||
run: |
|
||||
bash scripts/ci/amd_ci_exec.sh python3 run_suite.py --suite per-commit-amd --auto-partition-id ${{ matrix.part }} --auto-partition-size 8
|
||||
|
||||
unit-test-backend-1-gpu-amd-mi35x:
|
||||
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
|
||||
github.event.pull_request.draft == false
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
runner: [linux-mi35x-gpu-1]
|
||||
runs-on: ${{matrix.runner}}
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Start CI container
|
||||
run: bash scripts/ci/amd_ci_start_container.sh
|
||||
env:
|
||||
GITHUB_WORKSPACE: ${{ github.workspace }}
|
||||
|
||||
- name: Install dependencies
|
||||
run: bash scripts/ci/amd_ci_install_dependency.sh
|
||||
|
||||
- name: Run test
|
||||
timeout-minutes: 50
|
||||
run: |
|
||||
bash scripts/ci/amd_ci_exec.sh python3 run_suite.py --suite per-commit-amd-mi35x
|
||||
|
||||
unit-test-backend-2-gpu-amd:
|
||||
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
|
||||
github.event.pull_request.draft == false
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
runner: [linux-mi300-gpu-2, linux-mi325-gpu-2]
|
||||
runs-on: ${{matrix.runner}}
|
||||
@@ -270,6 +302,7 @@ jobs:
|
||||
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
|
||||
github.event.pull_request.draft == false
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
runner: [linux-mi300-gpu-8]
|
||||
runs-on: ${{matrix.runner}}
|
||||
@@ -290,30 +323,6 @@ jobs:
|
||||
run: |
|
||||
bash scripts/ci/amd_ci_exec.sh python3 run_suite.py --suite per-commit-8-gpu-amd --timeout-per-file 3600
|
||||
|
||||
unit-test-backend-8-gpu-CAR-amd:
|
||||
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
|
||||
github.event.pull_request.draft == false
|
||||
strategy:
|
||||
matrix:
|
||||
runner: [linux-mi300-gpu-8]
|
||||
runs-on: ${{matrix.runner}}
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Start CI container
|
||||
run: bash scripts/ci/amd_ci_start_container.sh
|
||||
env:
|
||||
GITHUB_WORKSPACE: ${{ github.workspace }}
|
||||
|
||||
- name: Install dependencies
|
||||
run: bash scripts/ci/amd_ci_install_dependency.sh
|
||||
|
||||
- name: Run CustomAllReduce test
|
||||
timeout-minutes: 20
|
||||
run: |
|
||||
bash scripts/ci/amd_ci_exec.sh -e CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python3 -m unittest test_custom_allreduce.TestCustomAllReduce
|
||||
|
||||
unit-test-sgl-kernel-amd:
|
||||
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
|
||||
github.event.pull_request.draft == false
|
||||
@@ -350,8 +359,8 @@ jobs:
|
||||
needs: [
|
||||
accuracy-test-1-gpu-amd, mla-test-1-gpu-amd, bench-test-2-gpu-amd,
|
||||
accuracy-test-2-gpu-amd, performance-test-1-gpu-part-1-amd, performance-test-1-gpu-part-2-amd,
|
||||
unit-test-backend-1-gpu-amd, unit-test-backend-2-gpu-amd, unit-test-backend-8-gpu-amd,
|
||||
unit-test-sgl-kernel-amd
|
||||
unit-test-backend-1-gpu-amd, unit-test-backend-1-gpu-amd-mi35x, unit-test-backend-2-gpu-amd,
|
||||
unit-test-backend-8-gpu-amd, unit-test-sgl-kernel-amd
|
||||
]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
|
||||
Reference in New Issue
Block a user