[CI] Add more multi-gpu tests (#1280)

This commit is contained in:
Lianmin Zheng
2024-09-01 00:27:25 -07:00
committed by GitHub
parent d134c139a1
commit 1b5d56f7f8
11 changed files with 271 additions and 128 deletions

View File

@@ -18,7 +18,7 @@ concurrency:
cancel-in-progress: true
jobs:
e2e-test:
one-gpu:
if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
runs-on: 1-gpu-runner
@@ -41,7 +41,8 @@ jobs:
- name: Benchmark Serving Latency
timeout-minutes: 10
run: |
python3 -m sglang.bench_latency --model meta-llama/Meta-Llama-3.1-8B-Instruct --batch-size 1 --input 128 --output 8
cd test/srt
python3 -m unittest test_serving_latency.TestServingLatency.test_default
- name: Benchmark Serving Throughput (w/o RadixAttention)
timeout-minutes: 10
@@ -54,3 +55,42 @@ jobs:
run: |
cd test/srt
python3 -m unittest test_serving_throughput.TestServingThroughput.test_default_without_chunked_prefill
two-gpu:
if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
runs-on: 2-gpu-runner
steps:
- name: Checkout code
uses: actions/checkout@v3
- name: Install dependencies
run: |
pip install --upgrade pip
pip install -e "python[all]"
pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
- name: Benchmark Serving Throughput (TP=2)
timeout-minutes: 10
run: |
cd test/srt
python3 -m unittest test_moe_serving_throughput.TestServingThroughput.test_default
- name: Benchmark Serving Latency (TP=2)
timeout-minutes: 10
run: |
cd test/srt
python3 -m unittest test_moe_serving_latency.TestServingLatency.test_default
- name: Benchmark Serving Throughput (w/o RadixAttention) (TP=2)
timeout-minutes: 10
run: |
cd test/srt
python3 -m unittest test_moe_serving_throughput.TestServingThroughput.test_default_without_radix_cache
finish:
needs: [one-gpu, two-gpu]
runs-on: ubuntu-latest
steps:
- name: Finish
run: echo "This is an empty step to ensure that all jobs are completed."