[CI] Add more multi-gpu tests (#1280)

2024-09-01 00:27:25 -07:00
parent d134c139a1
commit 1b5d56f7f8
11 changed files with 271 additions and 128 deletions
--- a/.github/workflows/e2e-test.yml
+++ b/.github/workflows/e2e-test.yml
@@ -18,7 +18,7 @@ concurrency:
  cancel-in-progress: true

 jobs:
-  e2e-test:
+  one-gpu:
    if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
    runs-on: 1-gpu-runner

@@ -41,7 +41,8 @@ jobs:
      - name: Benchmark Serving Latency
        timeout-minutes: 10
        run: |
-          python3 -m sglang.bench_latency --model meta-llama/Meta-Llama-3.1-8B-Instruct --batch-size 1 --input 128 --output 8
+          cd test/srt
+          python3 -m unittest test_serving_latency.TestServingLatency.test_default

      - name: Benchmark Serving Throughput (w/o RadixAttention)
        timeout-minutes: 10
@@ -54,3 +55,42 @@ jobs:
        run: |
          cd test/srt
          python3 -m unittest test_serving_throughput.TestServingThroughput.test_default_without_chunked_prefill
+
+  two-gpu:
+    if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
+    runs-on: 2-gpu-runner
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v3
+
+      - name: Install dependencies
+        run: |
+          pip install --upgrade pip
+          pip install -e "python[all]"
+          pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
+
+      - name: Benchmark Serving Throughput (TP=2)
+        timeout-minutes: 10
+        run: |
+          cd test/srt
+          python3 -m unittest test_moe_serving_throughput.TestServingThroughput.test_default
+
+      - name: Benchmark Serving Latency (TP=2)
+        timeout-minutes: 10
+        run: |
+          cd test/srt
+          python3 -m unittest test_moe_serving_latency.TestServingLatency.test_default
+
+      - name: Benchmark Serving Throughput (w/o RadixAttention) (TP=2)
+        timeout-minutes: 10
+        run: |
+          cd test/srt
+          python3 -m unittest test_moe_serving_throughput.TestServingThroughput.test_default_without_radix_cache
+
+  finish:
+    needs: [one-gpu, two-gpu]
+    runs-on: ubuntu-latest
+    steps:
+      - name: Finish
+        run: echo "This is an empty step to ensure that all jobs are completed."