[CI] Add more multi-gpu tests (#1280)

This commit is contained in:
Lianmin Zheng
2024-09-01 00:27:25 -07:00
committed by GitHub
parent d134c139a1
commit 1b5d56f7f8
11 changed files with 271 additions and 128 deletions

View File

@@ -18,7 +18,7 @@ concurrency:
cancel-in-progress: true
jobs:
accuracy-test:
one-gpu:
if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
runs-on: 1-gpu-runner
@@ -41,3 +41,34 @@ jobs:
run: |
cd test/srt
python3 test_eval_accuracy_large.py
two-gpu:
if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
runs-on: 2-gpu-runner
steps:
- name: Checkout code
uses: actions/checkout@v3
- name: Install dependencies
run: |
pip install --upgrade pip
pip install -e "python[all]"
pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
git clone https://github.com/merrymercy/human-eval.git
cd human-eval
pip install -e .
- name: Evaluate Accuracy
timeout-minutes: 20
run: |
cd test/srt
python3 test_moe_eval_accuracy_large.py
finish:
needs: [one-gpu, two-gpu]
runs-on: ubuntu-latest
steps:
- name: Finish
run: echo "This is an empty step to ensure that all jobs are completed."

View File

@@ -1,27 +0,0 @@
name: Weekly Cache Purge
on:
schedule:
- cron: '0 0 * * 0' # Every Sunday at 00:00
workflow_dispatch:
jobs:
purge-cache:
if: github.repository == 'sgl-project/sglang'
runs-on: self-hosted
steps:
- name: Checkout code
uses: actions/checkout@v3
- name: Purge pip cache
run: |
source $HOME/venv/bin/activate
echo "$HOME/venv/bin" >> $GITHUB_PATH
pip cache purge
- name: Update dependencies
run: |
pip install --upgrade pip
pip install -e "python[all]"
pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall

View File

@@ -18,7 +18,7 @@ concurrency:
cancel-in-progress: true
jobs:
e2e-test:
one-gpu:
if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
runs-on: 1-gpu-runner
@@ -41,7 +41,8 @@ jobs:
- name: Benchmark Serving Latency
timeout-minutes: 10
run: |
python3 -m sglang.bench_latency --model meta-llama/Meta-Llama-3.1-8B-Instruct --batch-size 1 --input 128 --output 8
cd test/srt
python3 -m unittest test_serving_latency.TestServingLatency.test_default
- name: Benchmark Serving Throughput (w/o RadixAttention)
timeout-minutes: 10
@@ -54,3 +55,42 @@ jobs:
run: |
cd test/srt
python3 -m unittest test_serving_throughput.TestServingThroughput.test_default_without_chunked_prefill
two-gpu:
if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
runs-on: 2-gpu-runner
steps:
- name: Checkout code
uses: actions/checkout@v3
- name: Install dependencies
run: |
pip install --upgrade pip
pip install -e "python[all]"
pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
- name: Benchmark Serving Throughput (TP=2)
timeout-minutes: 10
run: |
cd test/srt
python3 -m unittest test_moe_serving_throughput.TestServingThroughput.test_default
- name: Benchmark Serving Latency (TP=2)
timeout-minutes: 10
run: |
cd test/srt
python3 -m unittest test_moe_serving_latency.TestServingLatency.test_default
- name: Benchmark Serving Throughput (w/o RadixAttention) (TP=2)
timeout-minutes: 10
run: |
cd test/srt
python3 -m unittest test_moe_serving_throughput.TestServingThroughput.test_default_without_radix_cache
finish:
needs: [one-gpu, two-gpu]
runs-on: ubuntu-latest
steps:
- name: Finish
run: echo "This is an empty step to ensure that all jobs are completed."

View File

@@ -1,45 +0,0 @@
name: MoE Test
on:
push:
branches: [ main ]
paths:
- "python/sglang/**"
- "test/**"
pull_request:
branches: [ main ]
paths:
- "python/sglang/**"
- "test/**"
workflow_dispatch:
concurrency:
group: moe-test-${{ github.ref }}
cancel-in-progress: true
jobs:
moe-test:
if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
runs-on: 2-gpu-runner
steps:
- name: Checkout code
uses: actions/checkout@v3
- name: Install dependencies
run: |
pip install --upgrade pip
pip install -e "python[all]"
pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
- name: Benchmark MoE Serving Throughput
timeout-minutes: 10
run: |
cd test/srt
python3 -m unittest test_moe_serving_throughput.TestServingThroughput.test_default
- name: Benchmark MoE Serving Throughput (w/o RadixAttention)
timeout-minutes: 10
run: |
cd test/srt
python3 -m unittest test_moe_serving_throughput.TestServingThroughput.test_default_without_radix_cache

View File

@@ -18,7 +18,7 @@ concurrency:
cancel-in-progress: true
jobs:
unit-test-jobs:
run-test:
if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
runs-on: 1-gpu-runner
strategy:
@@ -48,9 +48,9 @@ jobs:
python3 run_suite.py --suite minimal --range-begin 8
fi
unit-test:
needs: unit-test-jobs
finish:
needs: [run-test]
runs-on: ubuntu-latest
steps:
- name: Merge step
run: echo "This is an empty merge step"
- name: Finish
run: echo "This is an empty step to ensure that all jobs are completed."