Update CI workflows (#1210)

This commit is contained in:
Lianmin Zheng
2024-08-25 16:43:07 -07:00
committed by GitHub
parent 308d024092
commit 15f1a49d2d
8 changed files with 43 additions and 48 deletions

View File

@@ -20,7 +20,7 @@ concurrency:
jobs:
accuracy-test:
if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
runs-on: accuracy
runs-on: accuracy-test
steps:
- name: Checkout code
@@ -28,9 +28,6 @@ jobs:
- name: Install dependencies
run: |
source $HOME/venv/bin/activate
echo "$HOME/venv/bin" >> $GITHUB_PATH
pip install --upgrade pip
pip install -e "python[all]"
pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
@@ -40,7 +37,7 @@ jobs:
pip install -e .
- name: Evaluate Accuracy
timeout-minutes: 20
run: |
cd test/srt
python3 test_eval_accuracy_large.py
timeout-minutes: 20

View File

@@ -20,7 +20,7 @@ concurrency:
jobs:
e2e-test:
if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
runs-on: e2e
runs-on: e2e-test
steps:
- name: Checkout code
@@ -28,27 +28,24 @@ jobs:
- name: Install dependencies
run: |
source $HOME/venv/bin/activate
echo "$HOME/venv/bin" >> $GITHUB_PATH
pip install --upgrade pip
pip install -e "python[all]"
pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
- name: Benchmark Serving Throughput
timeout-minutes: 10
run: |
cd test/srt
python3 -m unittest test_serving_throughput.TestServingThroughput.test_default
timeout-minutes: 10
- name: Benchmark Serving Throughput (w/o RadixAttention)
timeout-minutes: 10
run: |
cd test/srt
python3 -m unittest test_serving_throughput.TestServingThroughput.test_default_without_radix_cache
timeout-minutes: 10
- name: Benchmark Serving Throughput (w/o ChunkedPrefill)
timeout-minutes: 10
run: |
cd test/srt
python3 -m unittest test_serving_throughput.TestServingThroughput.test_default_without_chunked_prefill
timeout-minutes: 10

View File

@@ -18,30 +18,28 @@ concurrency:
cancel-in-progress: true
jobs:
moe-test:
if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
runs-on: accuracy
steps:
- name: Checkout code
uses: actions/checkout@v3
- name: Install dependencies
run: |
source $HOME/venv/bin/activate
echo "$HOME/venv/bin" >> $GITHUB_PATH
pip install --upgrade pip
pip install -e "python[all]"
pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
moe-test:
if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
runs-on: moe-test
- name: Benchmark MOE Serving Throughput
uses: nick-fields/retry@v3
with:
timeout_minutes: 15
max_attempts: 2
retry_on: error
command: |
cd test/srt
python3 -m unittest test_moe_serving_throughput.TestServingThroughput.test_default
python3 -m unittest test_moe_serving_throughput.TestServingThroughput.test_default_without_radix_cache
steps:
- name: Checkout code
uses: actions/checkout@v3
- name: Install dependencies
run: |
pip install --upgrade pip
pip install -e "python[all]"
pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
- name: Benchmark MoE Serving Throughput
timeout_minutes: 10
run: |
cd test/srt
python3 -m unittest test_moe_serving_throughput.TestServingThroughput.test_default
- name: Benchmark MoE Serving Throughput (w/o RadixAttention)
timeout_minutes: 10
run: |
cd test/srt
python3 -m unittest test_moe_serving_throughput.TestServingThroughput.test_default_without_radix_cache

View File

@@ -20,7 +20,7 @@ concurrency:
jobs:
unit-test:
if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
runs-on: unit
runs-on: unit-test
steps:
- name: Checkout code
@@ -28,9 +28,6 @@ jobs:
- name: Install dependencies
run: |
source $HOME/venv/bin/activate
echo "$HOME/venv/bin" >> $GITHUB_PATH
pip install --upgrade pip
pip install -e "python[all]"
pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
@@ -38,13 +35,13 @@ jobs:
pip install sentence_transformers
- name: Test Backend Runtime
timeout-minutes: 20
run: |
cd test/srt
python3 run_suite.py --suite minimal
timeout-minutes: 20
- name: Test Frontend Language
timeout-minutes: 10
run: |
cd test/lang
python3 run_suite.py --suite minimal
timeout-minutes: 10