diff --git a/.github/workflows/accuracy-test.yml b/.github/workflows/accuracy-test.yml index 16bb584f4..da2d98e86 100644 --- a/.github/workflows/accuracy-test.yml +++ b/.github/workflows/accuracy-test.yml @@ -6,11 +6,13 @@ on: paths: - "python/sglang/**" - "test/**" + - ".github/workflows/accuracy-test.yml" pull_request: branches: [ main ] paths: - "python/sglang/**" - "test/**" + - ".github/workflows/accuracy-test.yml" workflow_dispatch: concurrency: @@ -43,4 +45,4 @@ jobs: run: | cd test/srt python3 test_eval_accuracy_large.py - timeout-minutes: 20 + timeout-minutes: 10 diff --git a/.github/workflows/e2e-test.yml b/.github/workflows/e2e-test.yml index 455594bd7..3a338a657 100644 --- a/.github/workflows/e2e-test.yml +++ b/.github/workflows/e2e-test.yml @@ -6,11 +6,13 @@ on: paths: - "python/sglang/**" - "test/**" + - ".github/workflows/e2e-test.yml" pull_request: branches: [ main ] paths: - "python/sglang/**" - "test/**" + - ".github/workflows/e2e-test.yml" workflow_dispatch: concurrency: @@ -39,13 +41,16 @@ jobs: run: | cd test/srt python3 -m unittest test_serving_throughput.TestServingThroughput.test_default + timeout-minutes: 10 - name: Benchmark Serving Throughput (w/o RadixAttention) run: | cd test/srt python3 -m unittest test_serving_throughput.TestServingThroughput.test_default_without_radix_cache + timeout-minutes: 10 - name: Benchmark Serving Throughput (w/ ChunkedPrefill) run: | cd test/srt python3 -m unittest test_serving_throughput.TestServingThroughput.test_default_with_chunked_prefill + timeout-minutes: 10 diff --git a/.github/workflows/moe-test.yml b/.github/workflows/moe-test.yml index a781f2eff..39eb2a71d 100644 --- a/.github/workflows/moe-test.yml +++ b/.github/workflows/moe-test.yml @@ -6,11 +6,13 @@ on: paths: - "python/sglang/**" - "test/**" + - ".github/workflows/moe-test.yml" pull_request: branches: [ main ] paths: - "python/sglang/**" - "test/**" + - ".github/workflows/moe-test.yml" workflow_dispatch: concurrency: @@ -36,7 +38,12 @@ jobs: pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall - name: Benchmark MOE Serving Throughput - run: | - cd test/srt - python3 -m unittest test_moe_serving_throughput.TestServingThroughput.test_default - python3 -m unittest test_moe_serving_throughput.TestServingThroughput.test_default_without_radix_cache + uses: nick-fields/retry@v3 + with: + timeout_minutes: 15 + max_attempts: 2 + retry_on: error + command: | + cd test/srt + python3 -m unittest test_moe_serving_throughput.TestServingThroughput.test_default + python3 -m unittest test_moe_serving_throughput.TestServingThroughput.test_default_without_radix_cache diff --git a/.github/workflows/unit-test.yml b/.github/workflows/unit-test.yml index f9b79dc67..59228585f 100644 --- a/.github/workflows/unit-test.yml +++ b/.github/workflows/unit-test.yml @@ -6,11 +6,13 @@ on: paths: - "python/sglang/**" - "test/**" + - ".github/workflows/unit-test.yml" pull_request: branches: [ main ] paths: - "python/sglang/**" - "test/**" + - ".github/workflows/unit-test.yml" workflow_dispatch: concurrency: @@ -41,8 +43,10 @@ jobs: run: | cd test/srt python3 run_suite.py --suite minimal + timeout-minutes: 15 - name: Test Frontend Language run: | cd test/lang python3 run_suite.py --suite minimal + timeout-minutes: 10 diff --git a/test/srt/test_moe_serving_throughput.py b/test/srt/test_moe_serving_throughput.py index 48798c5d5..713eba7ab 100644 --- a/test/srt/test_moe_serving_throughput.py +++ b/test/srt/test_moe_serving_throughput.py @@ -73,7 +73,7 @@ class TestServingThroughput(unittest.TestCase): if os.getenv("SGLANG_IS_IN_CI", "false") == "true": # A100 (PCIE) performance - assert res["output_throughput"] > 950 + assert res["output_throughput"] > 930 def test_default_without_radix_cache(self): res = self.run_test(