[CI] merge all ci tests into one file (#1289)

2024-09-01 02:36:56 -07:00
parent 54772f784a
commit 761b2cebd6
7 changed files with 211 additions and 233 deletions
--- a/.github/workflows/accuracy-test.yml
+++ b/.github/workflows/accuracy-test.yml
@@ -1,74 +0,0 @@
-name: Accuracy Test
-
-on:
-  push:
-    branches: [ main ]
-    paths:
-      - "python/sglang/**"
-      - "test/**"
-  pull_request:
-    branches: [ main ]
-    paths:
-      - "python/sglang/**"
-      - "test/**"
-  workflow_dispatch:
-
-concurrency:
-  group: accuracy-test-${{ github.ref }}
-  cancel-in-progress: true
-
-jobs:
-  one-gpu:
-    if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
-    runs-on: 1-gpu-runner
-
-    steps:
-      - name: Checkout code
-        uses: actions/checkout@v3
-
-      - name: Install dependencies
-        run: |
-          pip install --upgrade pip
-          pip install -e "python[all]"
-          pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
-
-          git clone https://github.com/merrymercy/human-eval.git
-          cd human-eval
-          pip install -e .
-
-      - name: Evaluate Accuracy
-        timeout-minutes: 20
-        run: |
-          cd test/srt
-          python3 test_eval_accuracy_large.py
-
-  two-gpu:
-    if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
-    runs-on: 2-gpu-runner
-
-    steps:
-      - name: Checkout code
-        uses: actions/checkout@v3
-
-      - name: Install dependencies
-        run: |
-          pip install --upgrade pip
-          pip install -e "python[all]"
-          pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
-
-          git clone https://github.com/merrymercy/human-eval.git
-          cd human-eval
-          pip install -e .
-
-      - name: Evaluate Accuracy
-        timeout-minutes: 20
-        run: |
-          cd test/srt
-          python3 test_moe_eval_accuracy_large.py
-
-  finish:
-    needs: [one-gpu, two-gpu]
-    runs-on: ubuntu-latest
-    steps:
-      - name: Finish
-        run: echo "This is an empty step to ensure that all jobs are completed."
--- a/.github/workflows/e2e-test.yml
+++ b/.github/workflows/e2e-test.yml
@@ -1,96 +0,0 @@
-name: E2E Test
-
-on:
-  push:
-    branches: [ main ]
-    paths:
-      - "python/sglang/**"
-      - "test/**"
-  pull_request:
-    branches: [ main ]
-    paths:
-      - "python/sglang/**"
-      - "test/**"
-  workflow_dispatch:
-
-concurrency:
-  group: e2e-test-${{ github.ref }}
-  cancel-in-progress: true
-
-jobs:
-  one-gpu:
-    if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
-    runs-on: 1-gpu-runner
-
-    steps:
-      - name: Checkout code
-        uses: actions/checkout@v3
-
-      - name: Install dependencies
-        run: |
-          pip install --upgrade pip
-          pip install -e "python[all]"
-          pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
-
-      - name: Benchmark Serving Throughput
-        timeout-minutes: 10
-        run: |
-          cd test/srt
-          python3 -m unittest test_serving_throughput.TestServingThroughput.test_default
-
-      - name: Benchmark Serving Latency
-        timeout-minutes: 10
-        run: |
-          cd test/srt
-          python3 -m unittest test_serving_latency.TestServingLatency.test_default
-
-      - name: Benchmark Serving Throughput (w/o RadixAttention)
-        timeout-minutes: 10
-        run: |
-          cd test/srt
-          python3 -m unittest test_serving_throughput.TestServingThroughput.test_default_without_radix_cache
-
-      - name: Benchmark Serving Throughput (w/o ChunkedPrefill)
-        timeout-minutes: 10
-        run: |
-          cd test/srt
-          python3 -m unittest test_serving_throughput.TestServingThroughput.test_default_without_chunked_prefill
-
-  two-gpu:
-    if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
-    runs-on: 2-gpu-runner
-
-    steps:
-      - name: Checkout code
-        uses: actions/checkout@v3
-
-      - name: Install dependencies
-        run: |
-          pip install --upgrade pip
-          pip install -e "python[all]"
-          pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
-
-      - name: Benchmark Serving Throughput (TP=2)
-        timeout-minutes: 10
-        run: |
-          cd test/srt
-          python3 -m unittest test_moe_serving_throughput.TestServingThroughput.test_default
-
-      - name: Benchmark Serving Latency (TP=2)
-        timeout-minutes: 10
-        run: |
-          cd test/srt
-          python3 -m unittest test_moe_serving_latency.TestServingLatency.test_default
-
-      - name: Benchmark Serving Throughput (w/o RadixAttention) (TP=2)
-        timeout-minutes: 10
-        run: |
-          cd test/srt
-          python3 -m unittest test_moe_serving_throughput.TestServingThroughput.test_default_without_radix_cache
-
-  finish:
-    needs: [one-gpu, two-gpu]
-    runs-on: ubuntu-latest
-    steps:
-      - name: Finish
-        run: echo "This is an empty step to ensure that all jobs are completed."
--- a/.github/workflows/lint.yml
+++ b/.github/workflows/lint.yml
@@ -1,19 +1,22 @@
 name: Lint

-on: [push, pull_request]
+on: [pull_request]

 jobs:
  lint:
-    runs-on: ubuntu-20.04
+    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v2
-      - name: Set up Python 3.8
+
+      - name: Set up Python 3.9
        uses: actions/setup-python@v2
        with:
-          python-version: 3.8
+          python-version: 3.9
+
      - name: Install pre-commit hook
        run: |
          python -m pip install pre-commit
          pre-commit install
+
      - name: Linting
        run: pre-commit run --all-files
--- a/.github/workflows/pr-test.yml
+++ b/.github/workflows/pr-test.yml
@@ -0,0 +1,201 @@
+name: Pull Request Test
+
+on:
+  push:
+    branches: [ main ]
+    paths:
+      - "python/sglang/**"
+      - "test/**"
+  pull_request:
+    branches: [ main ]
+    paths:
+      - "python/sglang/**"
+      - "test/**"
+  workflow_dispatch:
+
+concurrency:
+  group: pr-test-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  unit-test-frontend:
+    if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
+    runs-on: 1-gpu-runner
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v3
+
+      - name: Install dependencies
+        run: |
+          pip install --upgrade pip
+          pip install -e "python[dev]"
+          pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
+
+      - name: Run test
+        timeout-minutes: 20
+        run: |
+          cd test/lang
+          python3 run_suite.py --suite minimal
+
+  unit-test-backend-part-0:
+    if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
+    runs-on: 1-gpu-runner
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v3
+
+      - name: Install dependencies
+        run: |
+          pip install --upgrade pip
+          pip install -e "python[dev]"
+          pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
+
+      - name: Run test
+        timeout-minutes: 20
+        run: |
+          cd test/srt
+          python3 run_suite.py --suite minimal --range-begin 0 --range-end 8
+
+  unit-test-backend-part-1:
+    if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
+    runs-on: 1-gpu-runner
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v3
+
+      - name: Install dependencies
+        run: |
+          pip install --upgrade pip
+          pip install -e "python[dev]"
+          pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
+
+      - name: Run test
+        timeout-minutes: 20
+        run: |
+          cd test/srt
+          python3 run_suite.py --suite minimal --range-begin 8
+
+  performance-test-1-gpu:
+    if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
+    runs-on: 1-gpu-runner
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v3
+
+      - name: Install dependencies
+        run: |
+          pip install --upgrade pip
+          pip install -e "python[all]"
+          pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
+
+      - name: Benchmark Serving Throughput
+        timeout-minutes: 10
+        run: |
+          cd test/srt
+          python3 -m unittest test_serving_throughput.TestServingThroughput.test_default
+
+      - name: Benchmark Serving Latency
+        timeout-minutes: 10
+        run: |
+          cd test/srt
+          python3 -m unittest test_serving_latency.TestServingLatency.test_default
+
+      - name: Benchmark Serving Throughput (w/o RadixAttention)
+        timeout-minutes: 10
+        run: |
+          cd test/srt
+          python3 -m unittest test_serving_throughput.TestServingThroughput.test_default_without_radix_cache
+
+      - name: Benchmark Serving Throughput (w/o ChunkedPrefill)
+        timeout-minutes: 10
+        run: |
+          cd test/srt
+          python3 -m unittest test_serving_throughput.TestServingThroughput.test_default_without_chunked_prefill
+
+  performance-test-2-gpu:
+    if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
+    runs-on: 2-gpu-runner
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v3
+
+      - name: Install dependencies
+        run: |
+          pip install --upgrade pip
+          pip install -e "python[all]"
+          pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
+
+      - name: Benchmark Serving Throughput (TP=2)
+        timeout-minutes: 10
+        run: |
+          cd test/srt
+          python3 -m unittest test_moe_serving_throughput.TestServingThroughput.test_default
+
+      - name: Benchmark Serving Latency (TP=2)
+        timeout-minutes: 10
+        run: |
+          cd test/srt
+          python3 -m unittest test_moe_serving_latency.TestServingLatency.test_default
+
+      - name: Benchmark Serving Throughput (w/o RadixAttention) (TP=2)
+        timeout-minutes: 10
+        run: |
+          cd test/srt
+          python3 -m unittest test_moe_serving_throughput.TestServingThroughput.test_default_without_radix_cache
+
+  accuracy-test-1-gpu:
+    if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
+    runs-on: 1-gpu-runner
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v3
+
+      - name: Install dependencies
+        run: |
+          pip install --upgrade pip
+          pip install -e "python[all]"
+          pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
+
+          git clone https://github.com/merrymercy/human-eval.git
+          cd human-eval
+          pip install -e .
+
+      - name: Evaluate Accuracy
+        timeout-minutes: 20
+        run: |
+          cd test/srt
+          python3 test_eval_accuracy_large.py
+
+  accuracy-test-2-gpu:
+    if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
+    runs-on: 2-gpu-runner
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v3
+
+      - name: Install dependencies
+        run: |
+          pip install --upgrade pip
+          pip install -e "python[all]"
+          pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
+
+          git clone https://github.com/merrymercy/human-eval.git
+          cd human-eval
+          pip install -e .
+
+      - name: Evaluate Accuracy
+        timeout-minutes: 20
+        run: |
+          cd test/srt
+          python3 test_moe_eval_accuracy_large.py
+
+  finish:
+    needs: [
+      unit-test-frontend, unit-test-backend-part-0, unit-test-backend-part-1,
+      performance-test-1-gpu, performance-test-2-gpu,
+      accuracy-test-1-gpu, accuracy-test-2-gpu
+    ]
+    runs-on: ubuntu-latest
+    steps:
+      - name: Finish
+        run: echo "This is an empty step to ensure that all jobs are completed."
--- a/.github/workflows/unit-test.yml
+++ b/.github/workflows/unit-test.yml
@@ -1,56 +0,0 @@
-name: Unit Test
-
-on:
-  push:
-    branches: [ main ]
-    paths:
-      - "python/sglang/**"
-      - "test/**"
-  pull_request:
-    branches: [ main ]
-    paths:
-      - "python/sglang/**"
-      - "test/**"
-  workflow_dispatch:
-
-concurrency:
-  group: unit-test-${{ github.ref }}
-  cancel-in-progress: true
-
-jobs:
-  run-test:
-    if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
-    runs-on: 1-gpu-runner
-    strategy:
-      matrix:
-        test_type: ['backend-0', 'backend-1', 'frontend']
-    steps:
-      - name: Checkout code
-        uses: actions/checkout@v3
-
-      - name: Install dependencies
-        run: |
-          pip install --upgrade pip
-          pip install -e "python[dev]"
-          pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
-
-      - name: Run test
-        timeout-minutes: 20
-        run: |
-          if [ "${{ matrix.test_type }}" = "frontend" ]; then
-            cd test/lang
-            python3 run_suite.py --suite minimal
-          elif [ "${{ matrix.test_type }}" = "backend-0" ]; then
-            cd test/srt
-            python3 run_suite.py --suite minimal --range-begin 0 --range-end 8
-          elif [ "${{ matrix.test_type }}" = "backend-1" ]; then
-            cd test/srt
-            python3 run_suite.py --suite minimal --range-begin 8
-          fi
-
-  finish:
-    needs: [run-test]
-    runs-on: ubuntu-latest
-    steps:
-      - name: Finish
-        run: echo "This is an empty step to ensure that all jobs are completed."
--- a/python/sglang/README.md
+++ b/python/sglang/README.md
@@ -2,8 +2,8 @@

 - `lang`: The frontend language.
 - `srt`: The backend engine for running local models. (SRT = SGLang Runtime).
- `test`: Test utilities.
- `api.py`: Public API.
+- `test`: The test utilities.
+- `api.py`: The public APIs.
 - `bench_latency.py`: Benchmark a single static batch.
 - `bench_serving.py`: Benchmark online serving with dynamic requests.
 - `global_config.py`: The global configs and constants.
--- a/test/srt/test_moe_serving_throughput.py
+++ b/test/srt/test_moe_serving_throughput.py
@@ -75,7 +75,7 @@ class TestServingThroughput(unittest.TestCase):
        )

        if os.getenv("SGLANG_IS_IN_CI", "false") == "true":
-            assert res["output_throughput"] > 1850
+            assert res["output_throughput"] > 1800

    def test_default_without_radix_cache(self):
        res = self.run_test(