From 761b2cebd65ff7fbf2cd55b63e1230df1bf6f6ca Mon Sep 17 00:00:00 2001 From: Lianmin Zheng Date: Sun, 1 Sep 2024 02:36:56 -0700 Subject: [PATCH] [CI] merge all ci tests into one file (#1289) --- .github/workflows/accuracy-test.yml | 74 --------- .github/workflows/e2e-test.yml | 96 ----------- .github/workflows/lint.yml | 11 +- .github/workflows/pr-test.yml | 201 ++++++++++++++++++++++++ .github/workflows/unit-test.yml | 56 ------- python/sglang/README.md | 4 +- test/srt/test_moe_serving_throughput.py | 2 +- 7 files changed, 211 insertions(+), 233 deletions(-) delete mode 100644 .github/workflows/accuracy-test.yml delete mode 100644 .github/workflows/e2e-test.yml create mode 100644 .github/workflows/pr-test.yml delete mode 100644 .github/workflows/unit-test.yml diff --git a/.github/workflows/accuracy-test.yml b/.github/workflows/accuracy-test.yml deleted file mode 100644 index b7118e217..000000000 --- a/.github/workflows/accuracy-test.yml +++ /dev/null @@ -1,74 +0,0 @@ -name: Accuracy Test - -on: - push: - branches: [ main ] - paths: - - "python/sglang/**" - - "test/**" - pull_request: - branches: [ main ] - paths: - - "python/sglang/**" - - "test/**" - workflow_dispatch: - -concurrency: - group: accuracy-test-${{ github.ref }} - cancel-in-progress: true - -jobs: - one-gpu: - if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request' - runs-on: 1-gpu-runner - - steps: - - name: Checkout code - uses: actions/checkout@v3 - - - name: Install dependencies - run: | - pip install --upgrade pip - pip install -e "python[all]" - pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall - - git clone https://github.com/merrymercy/human-eval.git - cd human-eval - pip install -e . - - - name: Evaluate Accuracy - timeout-minutes: 20 - run: | - cd test/srt - python3 test_eval_accuracy_large.py - - two-gpu: - if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request' - runs-on: 2-gpu-runner - - steps: - - name: Checkout code - uses: actions/checkout@v3 - - - name: Install dependencies - run: | - pip install --upgrade pip - pip install -e "python[all]" - pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall - - git clone https://github.com/merrymercy/human-eval.git - cd human-eval - pip install -e . - - - name: Evaluate Accuracy - timeout-minutes: 20 - run: | - cd test/srt - python3 test_moe_eval_accuracy_large.py - - finish: - needs: [one-gpu, two-gpu] - runs-on: ubuntu-latest - steps: - - name: Finish - run: echo "This is an empty step to ensure that all jobs are completed." diff --git a/.github/workflows/e2e-test.yml b/.github/workflows/e2e-test.yml deleted file mode 100644 index c5594ac4a..000000000 --- a/.github/workflows/e2e-test.yml +++ /dev/null @@ -1,96 +0,0 @@ -name: E2E Test - -on: - push: - branches: [ main ] - paths: - - "python/sglang/**" - - "test/**" - pull_request: - branches: [ main ] - paths: - - "python/sglang/**" - - "test/**" - workflow_dispatch: - -concurrency: - group: e2e-test-${{ github.ref }} - cancel-in-progress: true - -jobs: - one-gpu: - if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request' - runs-on: 1-gpu-runner - - steps: - - name: Checkout code - uses: actions/checkout@v3 - - - name: Install dependencies - run: | - pip install --upgrade pip - pip install -e "python[all]" - pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall - - - name: Benchmark Serving Throughput - timeout-minutes: 10 - run: | - cd test/srt - python3 -m unittest test_serving_throughput.TestServingThroughput.test_default - - - name: Benchmark Serving Latency - timeout-minutes: 10 - run: | - cd test/srt - python3 -m unittest test_serving_latency.TestServingLatency.test_default - - - name: Benchmark Serving Throughput (w/o RadixAttention) - timeout-minutes: 10 - run: | - cd test/srt - python3 -m unittest test_serving_throughput.TestServingThroughput.test_default_without_radix_cache - - - name: Benchmark Serving Throughput (w/o ChunkedPrefill) - timeout-minutes: 10 - run: | - cd test/srt - python3 -m unittest test_serving_throughput.TestServingThroughput.test_default_without_chunked_prefill - - two-gpu: - if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request' - runs-on: 2-gpu-runner - - steps: - - name: Checkout code - uses: actions/checkout@v3 - - - name: Install dependencies - run: | - pip install --upgrade pip - pip install -e "python[all]" - pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall - - - name: Benchmark Serving Throughput (TP=2) - timeout-minutes: 10 - run: | - cd test/srt - python3 -m unittest test_moe_serving_throughput.TestServingThroughput.test_default - - - name: Benchmark Serving Latency (TP=2) - timeout-minutes: 10 - run: | - cd test/srt - python3 -m unittest test_moe_serving_latency.TestServingLatency.test_default - - - name: Benchmark Serving Throughput (w/o RadixAttention) (TP=2) - timeout-minutes: 10 - run: | - cd test/srt - python3 -m unittest test_moe_serving_throughput.TestServingThroughput.test_default_without_radix_cache - - finish: - needs: [one-gpu, two-gpu] - runs-on: ubuntu-latest - steps: - - name: Finish - run: echo "This is an empty step to ensure that all jobs are completed." diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 076140506..4857f844f 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -1,19 +1,22 @@ name: Lint -on: [push, pull_request] +on: [pull_request] jobs: lint: - runs-on: ubuntu-20.04 + runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - - name: Set up Python 3.8 + + - name: Set up Python 3.9 uses: actions/setup-python@v2 with: - python-version: 3.8 + python-version: 3.9 + - name: Install pre-commit hook run: | python -m pip install pre-commit pre-commit install + - name: Linting run: pre-commit run --all-files diff --git a/.github/workflows/pr-test.yml b/.github/workflows/pr-test.yml new file mode 100644 index 000000000..f8b50ad5d --- /dev/null +++ b/.github/workflows/pr-test.yml @@ -0,0 +1,201 @@ +name: Pull Request Test + +on: + push: + branches: [ main ] + paths: + - "python/sglang/**" + - "test/**" + pull_request: + branches: [ main ] + paths: + - "python/sglang/**" + - "test/**" + workflow_dispatch: + +concurrency: + group: pr-test-${{ github.ref }} + cancel-in-progress: true + +jobs: + unit-test-frontend: + if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request' + runs-on: 1-gpu-runner + steps: + - name: Checkout code + uses: actions/checkout@v3 + + - name: Install dependencies + run: | + pip install --upgrade pip + pip install -e "python[dev]" + pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall + + - name: Run test + timeout-minutes: 20 + run: | + cd test/lang + python3 run_suite.py --suite minimal + + unit-test-backend-part-0: + if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request' + runs-on: 1-gpu-runner + steps: + - name: Checkout code + uses: actions/checkout@v3 + + - name: Install dependencies + run: | + pip install --upgrade pip + pip install -e "python[dev]" + pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall + + - name: Run test + timeout-minutes: 20 + run: | + cd test/srt + python3 run_suite.py --suite minimal --range-begin 0 --range-end 8 + + unit-test-backend-part-1: + if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request' + runs-on: 1-gpu-runner + steps: + - name: Checkout code + uses: actions/checkout@v3 + + - name: Install dependencies + run: | + pip install --upgrade pip + pip install -e "python[dev]" + pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall + + - name: Run test + timeout-minutes: 20 + run: | + cd test/srt + python3 run_suite.py --suite minimal --range-begin 8 + + performance-test-1-gpu: + if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request' + runs-on: 1-gpu-runner + steps: + - name: Checkout code + uses: actions/checkout@v3 + + - name: Install dependencies + run: | + pip install --upgrade pip + pip install -e "python[all]" + pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall + + - name: Benchmark Serving Throughput + timeout-minutes: 10 + run: | + cd test/srt + python3 -m unittest test_serving_throughput.TestServingThroughput.test_default + + - name: Benchmark Serving Latency + timeout-minutes: 10 + run: | + cd test/srt + python3 -m unittest test_serving_latency.TestServingLatency.test_default + + - name: Benchmark Serving Throughput (w/o RadixAttention) + timeout-minutes: 10 + run: | + cd test/srt + python3 -m unittest test_serving_throughput.TestServingThroughput.test_default_without_radix_cache + + - name: Benchmark Serving Throughput (w/o ChunkedPrefill) + timeout-minutes: 10 + run: | + cd test/srt + python3 -m unittest test_serving_throughput.TestServingThroughput.test_default_without_chunked_prefill + + performance-test-2-gpu: + if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request' + runs-on: 2-gpu-runner + steps: + - name: Checkout code + uses: actions/checkout@v3 + + - name: Install dependencies + run: | + pip install --upgrade pip + pip install -e "python[all]" + pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall + + - name: Benchmark Serving Throughput (TP=2) + timeout-minutes: 10 + run: | + cd test/srt + python3 -m unittest test_moe_serving_throughput.TestServingThroughput.test_default + + - name: Benchmark Serving Latency (TP=2) + timeout-minutes: 10 + run: | + cd test/srt + python3 -m unittest test_moe_serving_latency.TestServingLatency.test_default + + - name: Benchmark Serving Throughput (w/o RadixAttention) (TP=2) + timeout-minutes: 10 + run: | + cd test/srt + python3 -m unittest test_moe_serving_throughput.TestServingThroughput.test_default_without_radix_cache + + accuracy-test-1-gpu: + if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request' + runs-on: 1-gpu-runner + steps: + - name: Checkout code + uses: actions/checkout@v3 + + - name: Install dependencies + run: | + pip install --upgrade pip + pip install -e "python[all]" + pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall + + git clone https://github.com/merrymercy/human-eval.git + cd human-eval + pip install -e . + + - name: Evaluate Accuracy + timeout-minutes: 20 + run: | + cd test/srt + python3 test_eval_accuracy_large.py + + accuracy-test-2-gpu: + if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request' + runs-on: 2-gpu-runner + steps: + - name: Checkout code + uses: actions/checkout@v3 + + - name: Install dependencies + run: | + pip install --upgrade pip + pip install -e "python[all]" + pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall + + git clone https://github.com/merrymercy/human-eval.git + cd human-eval + pip install -e . + + - name: Evaluate Accuracy + timeout-minutes: 20 + run: | + cd test/srt + python3 test_moe_eval_accuracy_large.py + + finish: + needs: [ + unit-test-frontend, unit-test-backend-part-0, unit-test-backend-part-1, + performance-test-1-gpu, performance-test-2-gpu, + accuracy-test-1-gpu, accuracy-test-2-gpu + ] + runs-on: ubuntu-latest + steps: + - name: Finish + run: echo "This is an empty step to ensure that all jobs are completed." diff --git a/.github/workflows/unit-test.yml b/.github/workflows/unit-test.yml deleted file mode 100644 index 5d774b67e..000000000 --- a/.github/workflows/unit-test.yml +++ /dev/null @@ -1,56 +0,0 @@ -name: Unit Test - -on: - push: - branches: [ main ] - paths: - - "python/sglang/**" - - "test/**" - pull_request: - branches: [ main ] - paths: - - "python/sglang/**" - - "test/**" - workflow_dispatch: - -concurrency: - group: unit-test-${{ github.ref }} - cancel-in-progress: true - -jobs: - run-test: - if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request' - runs-on: 1-gpu-runner - strategy: - matrix: - test_type: ['backend-0', 'backend-1', 'frontend'] - steps: - - name: Checkout code - uses: actions/checkout@v3 - - - name: Install dependencies - run: | - pip install --upgrade pip - pip install -e "python[dev]" - pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall - - - name: Run test - timeout-minutes: 20 - run: | - if [ "${{ matrix.test_type }}" = "frontend" ]; then - cd test/lang - python3 run_suite.py --suite minimal - elif [ "${{ matrix.test_type }}" = "backend-0" ]; then - cd test/srt - python3 run_suite.py --suite minimal --range-begin 0 --range-end 8 - elif [ "${{ matrix.test_type }}" = "backend-1" ]; then - cd test/srt - python3 run_suite.py --suite minimal --range-begin 8 - fi - - finish: - needs: [run-test] - runs-on: ubuntu-latest - steps: - - name: Finish - run: echo "This is an empty step to ensure that all jobs are completed." diff --git a/python/sglang/README.md b/python/sglang/README.md index c92144254..481c69aff 100644 --- a/python/sglang/README.md +++ b/python/sglang/README.md @@ -2,8 +2,8 @@ - `lang`: The frontend language. - `srt`: The backend engine for running local models. (SRT = SGLang Runtime). -- `test`: Test utilities. -- `api.py`: Public API. +- `test`: The test utilities. +- `api.py`: The public APIs. - `bench_latency.py`: Benchmark a single static batch. - `bench_serving.py`: Benchmark online serving with dynamic requests. - `global_config.py`: The global configs and constants. diff --git a/test/srt/test_moe_serving_throughput.py b/test/srt/test_moe_serving_throughput.py index 6f040da34..2acf626c1 100644 --- a/test/srt/test_moe_serving_throughput.py +++ b/test/srt/test_moe_serving_throughput.py @@ -75,7 +75,7 @@ class TestServingThroughput(unittest.TestCase): ) if os.getenv("SGLANG_IS_IN_CI", "false") == "true": - assert res["output_throughput"] > 1850 + assert res["output_throughput"] > 1800 def test_default_without_radix_cache(self): res = self.run_test(