[CI] Reduce CI time (#2801)

1. Only run light e2e test before the PR is `ready` to reduce CI time. 2. Run full test once the PR is labled `ready` and `ready for test` 3. Run lint job on self host CPU container to avoid waiting much. - vLLM version: v0.10.1.1 - vLLM main: 6910b56da2 Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
2025-09-09 10:52:14 +08:00
parent 1bbb20ea13
commit 5bcb4c1528
4 changed files with 262 additions and 56 deletions
--- a/.github/workflows/vllm_ascend_test.yaml
+++ b/.github/workflows/vllm_ascend_test.yaml
@@ -25,7 +25,6 @@ on:
    branches:
      - 'main'
      - '*-dev'
-
 # Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
 # declared as "shell: bash -el {0}" on steps that need to be properly activated.
 # It's used to activate ascend-toolkit environment variables.
@@ -44,7 +43,9 @@ jobs:
    uses: ./.github/workflows/pre-commit.yml

  changes:
-    runs-on: ubuntu-latest
+    runs-on: linux-amd64-cpu-0
+    container:
+      image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11
    outputs:
      e2e_tracker: ${{ steps.filter.outputs.e2e_tracker }}
      ut_tracker: ${{ steps.filter.outputs.ut_tracker }}
@@ -68,6 +69,7 @@ jobs:
              - 'packages.txt'
            ut_tracker:
              - 'tests/ut/**'
+
  ut:
    needs: [lint, changes]
    name: unit test
@@ -129,16 +131,16 @@ jobs:
          name: vllm-ascend
          verbose: true

-  e2e:
+  e2e-light:
    needs: [lint, changes]
    # only trigger e2e test after lint passed and the change is e2e related with pull request.
-    if: ${{ github.event_name == 'pull_request' && needs.lint.result == 'success' && needs.changes.outputs.e2e_tracker == 'true' }}
+    if: ${{ github.event_name == 'pull_request' && needs.lint.result == 'success' && needs.changes.outputs.e2e_tracker == 'true' && !contains(github.event.pull_request.labels.*.name, 'ready') }}
    strategy:
      max-parallel: 2
      matrix:
        os: [linux-aarch64-a2-1]
        vllm_version: [v0.10.1.1, main]
-    name: singlecard e2e test
+    name: singlecard e2e test - light
    runs-on: ${{ matrix.os }}
    container:
      image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11
@@ -191,38 +193,19 @@ jobs:
          VLLM_WORKER_MULTIPROC_METHOD: spawn
          VLLM_USE_MODELSCOPE: True
        run: |
-          # We found that if running aclgraph tests in batch, it will cause AclmdlRICaptureBegin error. So we run
-          # the test separately.
-
          pytest -sv tests/e2e/singlecard/test_aclgraph.py
-          pytest -sv tests/e2e/singlecard/test_ascend_scheduler.py
-          pytest -sv tests/e2e/singlecard/test_camem.py
-          pytest -sv tests/e2e/singlecard/test_chunked.py
-          pytest -sv tests/e2e/singlecard/test_embedding.py
-          pytest -sv tests/e2e/singlecard/test_guided_decoding.py
-          # TODO: Fix lora accuracy error
-          pytest -sv tests/e2e/singlecard/test_ilama_lora.py
-          pytest -sv tests/e2e/singlecard/test_profile_execute_duration.py
          pytest -sv tests/e2e/singlecard/test_quantization.py
-          pytest -sv tests/e2e/singlecard/test_sampler.py
-          pytest -sv tests/e2e/singlecard/test_vlm.py
+          pytest -sv tests/e2e/singlecard/test_vlm.py::test_multimodal_vl

-          # ------------------------------------ v1 spec decode test ------------------------------------ #
-          pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py
-          pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_torchair_correctness.py
-          pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py
-
-          pytest -sv tests/e2e/singlecard/ops/
-
-  e2e-2-cards:
-    needs: [e2e]
-    if: ${{ needs.e2e.result == 'success' }}
+  e2e-2-cards-light:
+    needs: [e2e-light]
+    if: ${{ needs.e2e-light.result == 'success' }}
    strategy:
      max-parallel: 2
      matrix:
        os: [linux-aarch64-a2-2]
        vllm_version: [v0.10.1.1, main]
-    name: multicard e2e test
+    name: multicard e2e test - light
    runs-on: ${{ matrix.os }}
    container:
      image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11
@@ -275,22 +258,4 @@ jobs:
          VLLM_WORKER_MULTIPROC_METHOD: spawn
          VLLM_USE_MODELSCOPE: True
        run: |
-          pytest -sv tests/e2e/multicard/test_data_parallel.py
-          pytest -sv tests/e2e/multicard/test_expert_parallel.py
-          # external_launcher test is not stable enough. Fix it later
-          # pytest -sv tests/e2e/multicard/test_external_launcher.py
-          pytest -sv tests/e2e/multicard/test_fused_moe_allgather_ep.py
-          pytest -sv tests/e2e/multicard/test_ilama_lora_tp2.py
-
-          # To avoid oom, we need to run the test in a single process.
-          pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_QwQ
-          pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_multistream_moe
-          pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen3_W8A8
-          pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen3_W4A8DYNAMIC
-          pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_W4A8DYNAMIC
-          pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_sp_for_qwen3_moe
-
-          #pytest -sv tests/e2e/multicard/test_pipeline_parallel.py
-          pytest -sv tests/e2e/multicard/test_prefix_caching.py
-          pytest -sv tests/e2e/multicard/test_qwen3_moe.py
-          pytest -sv tests/e2e/multicard/test_torchair_graph_mode.py
+          pytest -sv tests/e2e/multicard/test_qwen3_moe.py::test_models_distributed_Qwen3_MOE_TP2_WITH_EP