[CI] Reduce CI time (#2801)

1. Only run light e2e test before the PR is `ready` to reduce CI time. 2. Run full test once the PR is labled `ready` and `ready for test` 3. Run lint job on self host CPU container to avoid waiting much. - vLLM version: v0.10.1.1 - vLLM main: 6910b56da2 Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
2025-09-09 10:52:14 +08:00
parent 1bbb20ea13
commit 5bcb4c1528
4 changed files with 262 additions and 56 deletions
--- a/.github/actionlint.yaml
+++ b/.github/actionlint.yaml
@@ -15,3 +15,4 @@ self-hosted-runner:
    - linux-aarch64-a3-2
    - linux-aarch64-a3-4
    - linux-aarch64-a3-8
    - linux-amd64-cpu-0
--- a/.github/workflows/pre-commit.yml
+++ b/.github/workflows/pre-commit.yml
@@ -8,13 +8,19 @@ permissions:
 jobs:
  pre-commit:
-    runs-on: ubuntu-latest
+    runs-on: linux-amd64-cpu-0
    container:
      image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11
    steps:
    - name: Config mirrors
      run: |
        sed -Ei 's@(ports|archive).ubuntu.com@cache-service.nginx-pypi-cache.svc.cluster.local:8081@g' /etc/apt/sources.list
        pip config set global.index-url http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple
        pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local
        apt-get update -y
        apt install git -y
    - name: Checkout vllm-project/vllm-ascend repo
      uses: actions/checkout@v4
    - uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # v5.4.0
      with:
        python-version: "3.11"
    - run: echo "::add-matcher::.github/workflows/matchers/actionlint.json"
    - run: echo "::add-matcher::.github/workflows/matchers/mypy.json"
    - name: Checkout vllm-project/vllm repo
@@ -30,8 +36,9 @@ jobs:
    - name: Install vllm-ascend dev
      run: |
        pip install -r requirements-dev.txt --extra-index-url https://download.pytorch.org/whl/cpu
-    - uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd # v3.0.1
+        git config --global --add safe.directory '*'
    - name: Run pre-commit check
      env:
-        SHELLCHECK_OPTS: "--exclude=SC2046,SC2006,SC2086" # Exclude SC2046, SC2006, SC2086 for actionlint
+        SHELLCHECK_OPTS: "--exclude=SC2046,SC2006,SC2086"
-      with:
+        GOPROXY: "https://goproxy.cn,direct"
-        extra_args: --all-files --hook-stage manual
+      run: pre-commit run --all-files --hook-stage manual
--- a/.github/workflows/vllm_ascend_test.yaml
+++ b/.github/workflows/vllm_ascend_test.yaml
@@ -25,7 +25,6 @@ on:
    branches:
      - 'main'
      - '*-dev'
 # Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
 # declared as "shell: bash -el {0}" on steps that need to be properly activated.
 # It's used to activate ascend-toolkit environment variables.
@@ -44,7 +43,9 @@ jobs:
    uses: ./.github/workflows/pre-commit.yml
  changes:
-    runs-on: ubuntu-latest
+    runs-on: linux-amd64-cpu-0
    container:
      image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11
    outputs:
      e2e_tracker: ${{ steps.filter.outputs.e2e_tracker }}
      ut_tracker: ${{ steps.filter.outputs.ut_tracker }}
@@ -68,6 +69,7 @@ jobs:
              - 'packages.txt'
            ut_tracker:
              - 'tests/ut/**'
  ut:
    needs: [lint, changes]
    name: unit test
@@ -129,16 +131,16 @@ jobs:
          name: vllm-ascend
          verbose: true
-  e2e:
+  e2e-light:
    needs: [lint, changes]
    # only trigger e2e test after lint passed and the change is e2e related with pull request.
-    if: ${{ github.event_name == 'pull_request' && needs.lint.result == 'success' && needs.changes.outputs.e2e_tracker == 'true' }}
+    if: ${{ github.event_name == 'pull_request' && needs.lint.result == 'success' && needs.changes.outputs.e2e_tracker == 'true' && !contains(github.event.pull_request.labels.*.name, 'ready') }}
    strategy:
      max-parallel: 2
      matrix:
        os: [linux-aarch64-a2-1]
        vllm_version: [v0.10.1.1, main]
-    name: singlecard e2e test
+    name: singlecard e2e test - light
    runs-on: ${{ matrix.os }}
    container:
      image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11
@@ -191,38 +193,19 @@ jobs:
          VLLM_WORKER_MULTIPROC_METHOD: spawn
          VLLM_USE_MODELSCOPE: True
        run: |
          # We found that if running aclgraph tests in batch, it will cause AclmdlRICaptureBegin error. So we run
          # the test separately.
          pytest -sv tests/e2e/singlecard/test_aclgraph.py
          pytest -sv tests/e2e/singlecard/test_ascend_scheduler.py
          pytest -sv tests/e2e/singlecard/test_camem.py
          pytest -sv tests/e2e/singlecard/test_chunked.py
          pytest -sv tests/e2e/singlecard/test_embedding.py
          pytest -sv tests/e2e/singlecard/test_guided_decoding.py
          # TODO: Fix lora accuracy error
          pytest -sv tests/e2e/singlecard/test_ilama_lora.py
          pytest -sv tests/e2e/singlecard/test_profile_execute_duration.py
          pytest -sv tests/e2e/singlecard/test_quantization.py
-          pytest -sv tests/e2e/singlecard/test_sampler.py
+          pytest -sv tests/e2e/singlecard/test_vlm.py::test_multimodal_vl
          pytest -sv tests/e2e/singlecard/test_vlm.py
-          # ------------------------------------ v1 spec decode test ------------------------------------ #
+  e2e-2-cards-light:
-          pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py
+    needs: [e2e-light]
-          pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_torchair_correctness.py
+    if: ${{ needs.e2e-light.result == 'success' }}
          pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py
          pytest -sv tests/e2e/singlecard/ops/
  e2e-2-cards:
    needs: [e2e]
    if: ${{ needs.e2e.result == 'success' }}
    strategy:
      max-parallel: 2
      matrix:
        os: [linux-aarch64-a2-2]
        vllm_version: [v0.10.1.1, main]
-    name: multicard e2e test
+    name: multicard e2e test - light
    runs-on: ${{ matrix.os }}
    container:
      image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11
@@ -275,22 +258,4 @@ jobs:
          VLLM_WORKER_MULTIPROC_METHOD: spawn
          VLLM_USE_MODELSCOPE: True
        run: |
-          pytest -sv tests/e2e/multicard/test_data_parallel.py
+          pytest -sv tests/e2e/multicard/test_qwen3_moe.py::test_models_distributed_Qwen3_MOE_TP2_WITH_EP
          pytest -sv tests/e2e/multicard/test_expert_parallel.py
          # external_launcher test is not stable enough. Fix it later
          # pytest -sv tests/e2e/multicard/test_external_launcher.py
          pytest -sv tests/e2e/multicard/test_fused_moe_allgather_ep.py
          pytest -sv tests/e2e/multicard/test_ilama_lora_tp2.py
          # To avoid oom, we need to run the test in a single process.
          pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_QwQ
          pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_multistream_moe
          pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen3_W8A8
          pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen3_W4A8DYNAMIC
          pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_W4A8DYNAMIC
          pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_sp_for_qwen3_moe
          #pytest -sv tests/e2e/multicard/test_pipeline_parallel.py
          pytest -sv tests/e2e/multicard/test_prefix_caching.py
          pytest -sv tests/e2e/multicard/test_qwen3_moe.py
          pytest -sv tests/e2e/multicard/test_torchair_graph_mode.py
--- a/.github/workflows/vllm_ascend_test_full.yaml
+++ b/.github/workflows/vllm_ascend_test_full.yaml
@@ -0,0 +1,233 @@
 #
 # Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # This file is a part of the vllm-ascend project.
 #
 name: 'test-full'
 on:
  pull_request:
    branches:
      - 'main'
      - '*-dev'
    types: [ labeled ]
 # Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
 # declared as "shell: bash -el {0}" on steps that need to be properly activated.
 # It's used to activate ascend-toolkit environment variables.
 defaults:
  run:
    shell: bash -el {0}
 # only cancel in-progress runs of the same workflow
 # and ignore the lint / 1 card / 4 cards test type
 concurrency:
  group: ${{ github.workflow }}-${{ github.ref }}
  cancel-in-progress: true
 jobs:
  changes:
    runs-on: linux-amd64-cpu-0
    if: ${{ contains(github.event.pull_request.labels.*.name, 'ready') && contains(github.event.pull_request.labels.*.name, 'ready-for-test') }}
    container:
      image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11
    outputs:
      e2e_tracker: ${{ steps.filter.outputs.e2e_tracker }}
      ut_tracker: ${{ steps.filter.outputs.ut_tracker }}
    steps:
      - uses: actions/checkout@v4
      - uses: dorny/paths-filter@v3
        id: filter
        with:
          filters: |
            e2e_tracker:
              - '.github/workflows/vllm_ascend_test.yaml'
              - 'vllm_ascend/**'
              - 'csrc/**'
              - 'cmake/**'
              - 'tests/e2e/**'
              - 'CMakeLists.txt'
              - 'setup.py'
              - 'requirements.txt'
              - 'requirements-dev.txt'
              - 'requirements-lint.txt'
              - 'packages.txt'
            ut_tracker:
              - 'tests/ut/**'
  e2e-full:
    # only trigger full test when pull request is approved
    needs: [changes]
    if: ${{ needs.changes.outputs.e2e_tracker == 'true' }}
    strategy:
      max-parallel: 2
      matrix:
        os: [linux-aarch64-a2-1]
        vllm_version: [v0.10.1.1, main]
    name: singlecard e2e test - full
    runs-on: ${{ matrix.os }}
    container:
      image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11
      env:
        VLLM_LOGGING_LEVEL: ERROR
        VLLM_USE_MODELSCOPE: True
    steps:
      - name: Check npu and CANN info
        run: |
          npu-smi info
          cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
      - name: Config mirrors
        run: |
          sed -Ei 's@(ports|archive).ubuntu.com@cache-service.nginx-pypi-cache.svc.cluster.local:8081@g' /etc/apt/sources.list
          pip config set global.index-url http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple
          pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local
          apt-get update -y
          apt install git -y
      - name: Checkout vllm-project/vllm-ascend repo
        uses: actions/checkout@v4
      - name: Install system dependencies
        run: |
          apt-get -y install `cat packages.txt`
          apt-get -y install gcc g++ cmake libnuma-dev
      - name: Checkout vllm-project/vllm repo
        uses: actions/checkout@v4
        with:
          repository: vllm-project/vllm
          ref: ${{ matrix.vllm_version }}
          path: ./vllm-empty
      - name: Install vllm-project/vllm from source
        working-directory: ./vllm-empty
        run: |
          VLLM_TARGET_DEVICE=empty pip install -e .
      - name: Install vllm-project/vllm-ascend
        env:
          PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi
        run: |
          pip install -r requirements-dev.txt
          pip install -v -e .
      - name: Run e2e test
        env:
          VLLM_WORKER_MULTIPROC_METHOD: spawn
          VLLM_USE_MODELSCOPE: True
        run: |
          # We found that if running aclgraph tests in batch, it will cause AclmdlRICaptureBegin error. So we run
          # the test separately.
          pytest -sv tests/e2e/singlecard/test_aclgraph.py
          pytest -sv tests/e2e/singlecard/test_ascend_scheduler.py
          pytest -sv tests/e2e/singlecard/test_camem.py
          pytest -sv tests/e2e/singlecard/test_chunked.py
          pytest -sv tests/e2e/singlecard/test_embedding.py
          pytest -sv tests/e2e/singlecard/test_guided_decoding.py
          pytest -sv tests/e2e/singlecard/test_ilama_lora.py
          pytest -sv tests/e2e/singlecard/test_profile_execute_duration.py
          pytest -sv tests/e2e/singlecard/test_quantization.py
          pytest -sv tests/e2e/singlecard/test_sampler.py
          pytest -sv tests/e2e/singlecard/test_vlm.py
          # ------------------------------------ v1 spec decode test ------------------------------------ #
          pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py
          pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_torchair_correctness.py
          pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py
          pytest -sv tests/e2e/singlecard/ops/
  e2e-2-cards-full:
    # only trigger full test when pull request is approved
    needs: [changes]
    if: ${{ needs.changes.outputs.e2e_tracker == 'true' }}
    strategy:
      max-parallel: 2
      matrix:
        os: [linux-aarch64-a2-2]
        vllm_version: [v0.10.1.1, main]
    name: multicard e2e test - full
    runs-on: ${{ matrix.os }}
    container:
      image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11
      env:
        VLLM_LOGGING_LEVEL: ERROR
        VLLM_USE_MODELSCOPE: True
    steps:
      - name: Check npu and CANN info
        run: |
          npu-smi info
          cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
      - name: Config mirrors
        run: |
          sed -Ei 's@(ports|archive).ubuntu.com@cache-service.nginx-pypi-cache.svc.cluster.local:8081@g' /etc/apt/sources.list
          pip config set global.index-url http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple
          pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local
          apt-get update -y
          apt install git -y
      - name: Checkout vllm-project/vllm-ascend repo
        uses: actions/checkout@v4
      - name: Install system dependencies
        run: |
          apt-get -y install `cat packages.txt`
          apt-get -y install gcc g++ cmake libnuma-dev
      - name: Checkout vllm-project/vllm repo
        uses: actions/checkout@v4
        with:
          repository: vllm-project/vllm
          ref: ${{ matrix.vllm_version }}
          path: ./vllm-empty
      - name: Install vllm-project/vllm from source
        working-directory: ./vllm-empty
        run: |
          VLLM_TARGET_DEVICE=empty pip install -e .
      - name: Install vllm-project/vllm-ascend
        env:
          PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi
        run: |
          pip install -r requirements-dev.txt
          pip install -v -e .
      - name: Run vllm-project/vllm-ascend test
        env:
          VLLM_WORKER_MULTIPROC_METHOD: spawn
          VLLM_USE_MODELSCOPE: True
        run: |
          pytest -sv tests/e2e/multicard/test_data_parallel.py
          pytest -sv tests/e2e/multicard/test_expert_parallel.py
          # external_launcher test is not stable enough. Fix it later
          # pytest -sv tests/e2e/multicard/test_external_launcher.py
          pytest -sv tests/e2e/multicard/test_fused_moe_allgather_ep.py
          pytest -sv tests/e2e/multicard/test_ilama_lora_tp2.py
          # To avoid oom, we need to run the test in a single process.
          pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_QwQ
          pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_multistream_moe
          pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen3_W8A8
          pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen3_W4A8DYNAMIC
          pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_W4A8DYNAMIC
          pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_sp_for_qwen3_moe
          #pytest -sv tests/e2e/multicard/test_pipeline_parallel.py
          pytest -sv tests/e2e/multicard/test_prefix_caching.py
          pytest -sv tests/e2e/multicard/test_qwen3_moe.py
          pytest -sv tests/e2e/multicard/test_torchair_graph_mode.py