[CI] Add more multi-gpu tests (#1280)

2024-09-01 00:27:25 -07:00
parent d134c139a1
commit 1b5d56f7f8
11 changed files with 271 additions and 128 deletions
--- a/.github/workflows/accuracy-test.yml
+++ b/.github/workflows/accuracy-test.yml
@@ -18,7 +18,7 @@ concurrency:
  cancel-in-progress: true

 jobs:
-  accuracy-test:
+  one-gpu:
    if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
    runs-on: 1-gpu-runner

@@ -41,3 +41,34 @@ jobs:
        run: |
          cd test/srt
          python3 test_eval_accuracy_large.py
+
+  two-gpu:
+    if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
+    runs-on: 2-gpu-runner
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v3
+
+      - name: Install dependencies
+        run: |
+          pip install --upgrade pip
+          pip install -e "python[all]"
+          pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
+
+          git clone https://github.com/merrymercy/human-eval.git
+          cd human-eval
+          pip install -e .
+
+      - name: Evaluate Accuracy
+        timeout-minutes: 20
+        run: |
+          cd test/srt
+          python3 test_moe_eval_accuracy_large.py
+
+  finish:
+    needs: [one-gpu, two-gpu]
+    runs-on: ubuntu-latest
+    steps:
+      - name: Finish
+        run: echo "This is an empty step to ensure that all jobs are completed."
--- a/.github/workflows/cache-purge.yml
+++ b/.github/workflows/cache-purge.yml
@@ -1,27 +0,0 @@
-name: Weekly Cache Purge
-
-on:
-  schedule:
-    - cron: '0 0 * * 0' # Every Sunday at 00:00
-  workflow_dispatch:
-
-jobs:
-  purge-cache:
-    if: github.repository == 'sgl-project/sglang'
-    runs-on: self-hosted
-
-    steps:
-    - name: Checkout code
-      uses: actions/checkout@v3
-
-    - name: Purge pip cache
-      run: |
-        source $HOME/venv/bin/activate
-        echo "$HOME/venv/bin" >> $GITHUB_PATH
-        pip cache purge
-
-    - name: Update dependencies
-      run: |
-        pip install --upgrade pip
-        pip install -e "python[all]"
-        pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
--- a/.github/workflows/e2e-test.yml
+++ b/.github/workflows/e2e-test.yml
@@ -18,7 +18,7 @@ concurrency:
  cancel-in-progress: true

 jobs:
-  e2e-test:
+  one-gpu:
    if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
    runs-on: 1-gpu-runner

@@ -41,7 +41,8 @@ jobs:
      - name: Benchmark Serving Latency
        timeout-minutes: 10
        run: |
-          python3 -m sglang.bench_latency --model meta-llama/Meta-Llama-3.1-8B-Instruct --batch-size 1 --input 128 --output 8
+          cd test/srt
+          python3 -m unittest test_serving_latency.TestServingLatency.test_default

      - name: Benchmark Serving Throughput (w/o RadixAttention)
        timeout-minutes: 10
@@ -54,3 +55,42 @@ jobs:
        run: |
          cd test/srt
          python3 -m unittest test_serving_throughput.TestServingThroughput.test_default_without_chunked_prefill
+
+  two-gpu:
+    if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
+    runs-on: 2-gpu-runner
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v3
+
+      - name: Install dependencies
+        run: |
+          pip install --upgrade pip
+          pip install -e "python[all]"
+          pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
+
+      - name: Benchmark Serving Throughput (TP=2)
+        timeout-minutes: 10
+        run: |
+          cd test/srt
+          python3 -m unittest test_moe_serving_throughput.TestServingThroughput.test_default
+
+      - name: Benchmark Serving Latency (TP=2)
+        timeout-minutes: 10
+        run: |
+          cd test/srt
+          python3 -m unittest test_moe_serving_latency.TestServingLatency.test_default
+
+      - name: Benchmark Serving Throughput (w/o RadixAttention) (TP=2)
+        timeout-minutes: 10
+        run: |
+          cd test/srt
+          python3 -m unittest test_moe_serving_throughput.TestServingThroughput.test_default_without_radix_cache
+
+  finish:
+    needs: [one-gpu, two-gpu]
+    runs-on: ubuntu-latest
+    steps:
+      - name: Finish
+        run: echo "This is an empty step to ensure that all jobs are completed."
--- a/.github/workflows/moe-test.yml
+++ b/.github/workflows/moe-test.yml
@@ -1,45 +0,0 @@
-name: MoE Test
-
-on:
-  push:
-    branches: [ main ]
-    paths:
-      - "python/sglang/**"
-      - "test/**"
-  pull_request:
-    branches: [ main ]
-    paths:
-      - "python/sglang/**"
-      - "test/**"
-  workflow_dispatch:
-
-concurrency:
-  group: moe-test-${{ github.ref }}
-  cancel-in-progress: true
-
-jobs:
-  moe-test:
-    if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
-    runs-on: 2-gpu-runner
-
-    steps:
-      - name: Checkout code
-        uses: actions/checkout@v3
-
-      - name: Install dependencies
-        run: |
-          pip install --upgrade pip
-          pip install -e "python[all]"
-          pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
-
-      - name: Benchmark MoE Serving Throughput
-        timeout-minutes: 10
-        run: |
-          cd test/srt
-          python3 -m unittest test_moe_serving_throughput.TestServingThroughput.test_default
-
-      - name: Benchmark MoE Serving Throughput (w/o RadixAttention)
-        timeout-minutes: 10
-        run: |
-          cd test/srt
-          python3 -m unittest test_moe_serving_throughput.TestServingThroughput.test_default_without_radix_cache
--- a/.github/workflows/unit-test.yml
+++ b/.github/workflows/unit-test.yml
@@ -18,7 +18,7 @@ concurrency:
  cancel-in-progress: true

 jobs:
-  unit-test-jobs:
+  run-test:
    if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
    runs-on: 1-gpu-runner
    strategy:
@@ -48,9 +48,9 @@ jobs:
            python3 run_suite.py --suite minimal --range-begin 8
          fi

-  unit-test:
-    needs: unit-test-jobs
+  finish:
+    needs: [run-test]
    runs-on: ubuntu-latest
    steps:
-      - name: Merge step
-        run: echo "This is an empty merge step"
+      - name: Finish
+        run: echo "This is an empty step to ensure that all jobs are completed."