From 8e3f8bab57cff0a98dc75ad43d8bf5bb4113f34e Mon Sep 17 00:00:00 2001 From: Li Wang Date: Wed, 25 Mar 2026 09:24:01 +0800 Subject: [PATCH] [Nightly] Nightly pre-build image (#7388) ### What this PR does / why we need it? This pull request refactor nightly image build and simplify the logic of multi workflows. 1. Nightly image build become the prerequisite when the test are triggered by `schedule` or `workflow_dispatch` 2. Simplify the pull request select case logic 3. Next step: Implement replaceable nightly tests. Specifically, if nightly tests are manually triggered, they can accept any optional docker image to meet the needs of different commits(Which means the image is customizable). ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? - vLLM version: v0.17.0 - vLLM main: https://github.com/vllm-project/vllm/commit/4034c3d32e30d01639459edd3ab486f56993876d --------- Signed-off-by: wangli --- .../workflows/_e2e_nightly_multi_node.yaml | 11 +- .../workflows/_e2e_nightly_single_node.yaml | 21 +- .github/workflows/_nightly_image_build.yaml | 71 +++++++ .github/workflows/_parse_trigger.yaml | 115 ++++++++++ .github/workflows/nightly_image_build.yaml | 44 ++++ .../schedule_nightly_image_build.yaml | 66 ------ .../workflows/schedule_nightly_test_a2.yaml | 168 ++++----------- .../workflows/schedule_nightly_test_a3.yaml | 199 +++--------------- 8 files changed, 315 insertions(+), 380 deletions(-) create mode 100644 .github/workflows/_nightly_image_build.yaml create mode 100644 .github/workflows/_parse_trigger.yaml create mode 100644 .github/workflows/nightly_image_build.yaml delete mode 100644 .github/workflows/schedule_nightly_image_build.yaml diff --git a/.github/workflows/_e2e_nightly_multi_node.yaml b/.github/workflows/_e2e_nightly_multi_node.yaml index ff9513f6..3bace55f 100644 --- a/.github/workflows/_e2e_nightly_multi_node.yaml +++ b/.github/workflows/_e2e_nightly_multi_node.yaml @@ -45,12 +45,9 @@ on: default: main type: string description: used for pr level tests - is_pr_test: + should_run: required: true type: boolean - is_run: - required: true - type: boolean secrets: KUBECONFIG_B64: required: true @@ -74,7 +71,7 @@ jobs: name: ${{ inputs.config_file_path }} # This is the runner with no NPU for k8s controller runs-on: ${{ inputs.runner }} - if: ${{ inputs.is_run }} + if: ${{ inputs.should_run }} container: image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/vllm-ascend:nightly-cpu env: @@ -84,7 +81,7 @@ jobs: - name: Decode kubeconfig from secrets run: | # Decode and save kubeconfig - if [ "${{ inputs.is_pr_test }}" = "true" ]; then + if [ "${{ github.event_name }}" = "pull_request" ]; then echo "PR test mode" if [ "${{ inputs.soc_version }}" = "a3" ]; then echo "Using A3 cached kubeconfig" @@ -161,7 +158,7 @@ jobs: image="${{ inputs.image }}" config_file_path="${{ inputs.config_file_path }}" fail_tag=FAIL_TAG_"${{ inputs.config_file_path }}" - is_pr_test="${{ inputs.is_pr_test }}" + is_pr_test="${{ github.event_name == 'pull_request' }}" vllm_version="${{ inputs.vllm_version }}" vllm_ascend_ref="${{ inputs.vllm_ascend_ref }}" vllm_ascend_remote_url="${{ inputs.vllm_ascend_remote_url }}" diff --git a/.github/workflows/_e2e_nightly_single_node.yaml b/.github/workflows/_e2e_nightly_single_node.yaml index dac3a9c0..a445999e 100644 --- a/.github/workflows/_e2e_nightly_single_node.yaml +++ b/.github/workflows/_e2e_nightly_single_node.yaml @@ -40,10 +40,7 @@ on: required: false type: string default: "v0.18.0" - is_pr_test: - required: true - type: boolean - is_run: + should_run: required: true type: boolean @@ -64,7 +61,7 @@ jobs: e2e-nightly: name: ${{ inputs.name || inputs.config_file_path || inputs.tests }} runs-on: ${{ inputs.runner }} - if: ${{ inputs.is_run }} + if: ${{ inputs.should_run }} timeout-minutes: 600 container: image: ${{ inputs.image }} @@ -85,14 +82,14 @@ jobs: pip install uv - name: uninstall vlm vllm-ascend and remove code (if pr test) - if: ${{ inputs.is_pr_test }} + if: ${{ github.event_name == 'pull_request' }} run: | pip uninstall -y vllm vllm-ascend || true cp -r /vllm-workspace/vllm-ascend/benchmark /tmp/aisbench-backup || true rm -rf /vllm-workspace/vllm /vllm-workspace/vllm-ascend - name: Checkout vllm-project/vllm repo - if: ${{ inputs.is_pr_test }} + if: ${{ github.event_name == 'pull_request' }} uses: actions/checkout@v6 with: repository: vllm-project/vllm @@ -101,27 +98,27 @@ jobs: fetch-depth: 1 - name: Checkout vllm-project/vllm-ascend repo - if: ${{ inputs.is_pr_test }} + if: ${{ github.event_name == 'pull_request' }} uses: actions/checkout@v6 with: path: ./temp-vllm-ascend fetch-depth: 1 - name: Move code to /vllm-workspace - if: ${{ inputs.is_pr_test }} + if: ${{ github.event_name == 'pull_request' }} run: | mv ./temp-vllm /vllm-workspace/vllm mv ./temp-vllm-ascend /vllm-workspace/vllm-ascend ls -R /vllm-workspace - name: Install vllm-project/vllm from source - if: ${{ inputs.is_pr_test }} + if: ${{ github.event_name == 'pull_request' }} working-directory: /vllm-workspace/vllm run: | VLLM_TARGET_DEVICE=empty uv pip install -e . - name: Install vllm-project/vllm-ascend - if: ${{ inputs.is_pr_test }} + if: ${{ github.event_name == 'pull_request' }} working-directory: /vllm-workspace/vllm-ascend env: PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi @@ -132,7 +129,7 @@ jobs: uv pip install -v -e . - name: Install aisbench - if: ${{ inputs.is_pr_test }} + if: ${{ github.event_name == 'pull_request' }} shell: bash -l {0} run: | cp -r /tmp/aisbench-backup /vllm-workspace/vllm-ascend/benchmark diff --git a/.github/workflows/_nightly_image_build.yaml b/.github/workflows/_nightly_image_build.yaml new file mode 100644 index 00000000..60b76e6c --- /dev/null +++ b/.github/workflows/_nightly_image_build.yaml @@ -0,0 +1,71 @@ +# +# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# This file is a part of the vllm-ascend project. +# + +name: 'Nightly image build' + +on: + workflow_call: + inputs: + target: + required: true + type: string + description: "Build target: 'a2' or 'a3'" + secrets: + HW_USERNAME: + required: false + HW_TOKEN: + required: false + GITEE_TOKEN: + required: false + +jobs: + build: + name: Build nightly-${{ inputs.target }} image + runs-on: ubuntu-22.04-arm + steps: + - uses: actions/checkout@v6 + + - name: Login to Huawei Cloud SWR + id: login-swr + if: ${{ env.HW_USERNAME != '' && env.HW_TOKEN != '' }} + env: + HW_USERNAME: ${{ secrets.HW_USERNAME }} + HW_TOKEN: ${{ secrets.HW_TOKEN }} + run: | + echo "$HW_TOKEN" | docker login -u "$HW_USERNAME" --password-stdin swr.cn-southwest-2.myhuaweicloud.com + + - name: Build nightly-${{ inputs.target }} image + env: + GITEE_USERNAME: ${{ vars.GITEE_USERNAME }} + GITEE_TOKEN: ${{ secrets.GITEE_TOKEN }} + run: | + IMAGE="swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/vllm-ascend:nightly-${{ inputs.target }}" + docker build \ + --network host \ + --platform linux/arm64 \ + -f .github/workflows/dockerfiles/Dockerfile.nightly.${{ inputs.target }} \ + --build-arg CANN_VERSION="8.5.1" \ + --build-arg UBUNTU_VERSION="22.04" \ + --build-arg PYTHON_VERSION="3.11" \ + --build-arg GITEE_USERNAME="${GITEE_USERNAME}" \ + --build-arg GITEE_TOKEN="${GITEE_TOKEN}" \ + -t "$IMAGE" . + + - name: Push image to SWR + if: ${{ github.repository_owner == 'vllm-project' && steps.login-swr.conclusion == 'success' }} + run: | + docker push swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/vllm-ascend:nightly-${{ inputs.target }} diff --git a/.github/workflows/_parse_trigger.yaml b/.github/workflows/_parse_trigger.yaml new file mode 100644 index 00000000..e7baeb8d --- /dev/null +++ b/.github/workflows/_parse_trigger.yaml @@ -0,0 +1,115 @@ +# +# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# This file is a part of the vllm-ascend project. +# + +name: 'Parse nightly trigger' + +on: + workflow_call: + inputs: + runner: + required: false + type: string + default: linux-aarch64-a2b3-0 + outputs: + run: + description: "Whether nightly tests should run" + value: ${{ jobs.parse.outputs.run }} + filter: + description: "Comma-wrapped test name filter (e.g. ',name1,name2,'), or 'all'" + value: ${{ jobs.parse.outputs.filter }} + ref: + description: "The vllm-ascend ref (commit SHA for PRs, branch/tag name otherwise)" + value: ${{ jobs.parse.outputs.ref }} + +jobs: + parse: + name: Parse trigger and determine test scope + runs-on: ${{ inputs.runner }} + outputs: + run: ${{ steps.parse.outputs.run }} + filter: ${{ steps.parse.outputs.filter }} + ref: ${{ steps.parse.outputs.ref }} + steps: + - name: Parse trigger + id: parse + uses: actions/github-script@v7 + with: + script: | + const eventName = context.eventName; + + function parseNightlyComment(body) { + if (!body) return null; + const match = body.trim().match(/^\/nightly(?:\s+(.+))?$/m); + if (!match) return null; + const args = (match[1] || '').trim(); + if (!args || args === 'all') return 'all'; + // Wrap with commas for exact-name matching: ",name1,name2," + return ',' + args.split(/\s+/).join(',') + ','; + } + + function getRef() { + if (eventName === 'pull_request') { + return context.payload.pull_request.head.sha; + } + return (context.ref || '').replace(/^refs\/(heads|tags)\//, '') || 'main'; + } + + core.setOutput('ref', getRef()); + + // 1. schedule / workflow_dispatch: run all tests with pre-built image + if (eventName === 'schedule' || eventName === 'workflow_dispatch') { + core.setOutput('run', 'true'); + core.setOutput('filter', 'all'); + return; + } + + // 2. pull_request (labeled / synchronize) + if (eventName === 'pull_request') { + const labels = context.payload.pull_request.labels.map(l => l.name); + if (!labels.includes('nightly-test')) { + core.setOutput('run', 'false'); + core.setOutput('filter', ''); + return; + } + // Search comments for latest /nightly command + const prNumber = context.payload.pull_request.number; + const comments = await github.paginate(github.rest.issues.listComments, { + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: prNumber, + per_page: 100, + }); + let filter = null; + for (let i = comments.length - 1; i >= 0; i--) { + const result = parseNightlyComment(comments[i].body); + if (result !== null) { filter = result; break; } + } + // No /nightly comment found: do not run any tests + if (filter === null) { + core.info('nightly-test label present but no /nightly comment found; skipping.'); + core.setOutput('run', 'false'); + core.setOutput('filter', ''); + return; + } + core.setOutput('run', 'true'); + core.setOutput('filter', filter); + return; + } + + // Fallback + core.setOutput('run', 'false'); + core.setOutput('filter', ''); diff --git a/.github/workflows/nightly_image_build.yaml b/.github/workflows/nightly_image_build.yaml new file mode 100644 index 00000000..f5c6f240 --- /dev/null +++ b/.github/workflows/nightly_image_build.yaml @@ -0,0 +1,44 @@ +# +# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# This file is a part of the vllm-ascend project. +# + +# This workflow builds nightly images as a layer-cache warm-up at 20:00 Beijing time. +# The nightly test workflows (Nightly-A2, Nightly-A3) each rebuild the image fresh +# before running tests, so this schedule only serves to pre-populate the build cache. +name: Nightly Image Build Schedule + +on: + workflow_dispatch: + # Next step: Add more inputs here if needed, e.g. vllm version, vllm-ascend version, image tag, etc. + +jobs: + build-a2: + uses: ./.github/workflows/_nightly_image_build.yaml + with: + target: a2 + secrets: + HW_USERNAME: ${{ secrets.HW_USERNAME }} + HW_TOKEN: ${{ secrets.HW_TOKEN }} + GITEE_TOKEN: ${{ secrets.GITEE_TOKEN }} + + build-a3: + uses: ./.github/workflows/_nightly_image_build.yaml + with: + target: a3 + secrets: + HW_USERNAME: ${{ secrets.HW_USERNAME }} + HW_TOKEN: ${{ secrets.HW_TOKEN }} + GITEE_TOKEN: ${{ secrets.GITEE_TOKEN }} diff --git a/.github/workflows/schedule_nightly_image_build.yaml b/.github/workflows/schedule_nightly_image_build.yaml deleted file mode 100644 index cee5c283..00000000 --- a/.github/workflows/schedule_nightly_image_build.yaml +++ /dev/null @@ -1,66 +0,0 @@ -name: Nightly Image Build Schedule - -on: - schedule: - # UTC+8: 20pm, 23pm - - cron: '0 12,15 * * *' - workflow_dispatch: - -# This workflow builds and pushes Docker images for nightly-ci -# It will be built base on the quay.io/ascend/vllm-ascend:main -# And have some customizations for nightly testing, pushing to Huawei Cloud SWR -jobs: - build-and-sync: - runs-on: ubuntu-22.04-arm - - strategy: - matrix: - target: ['a2', 'a3'] - - outputs: - image-tag: ${{ steps.build-image.outputs.image-tag }} - - steps: - - uses: actions/checkout@v6 - - - name: Show build target - run: | - echo "Building target: ${{ matrix.target }}" - - - name: Login to Huawei Cloud SWR - id: login-swr - if: ${{ env.HW_USERNAME != '' && env.HW_TOKEN != '' }} - run: | - echo "${{ env.HW_TOKEN }}" | docker login -u "${{ env.HW_USERNAME }}" --password-stdin swr.cn-southwest-2.myhuaweicloud.com - env: - HW_USERNAME: ${{ secrets.HW_USERNAME }} - HW_TOKEN: ${{ secrets.HW_TOKEN }} - - - name: Build image - id: build-image - env: - GITEE_USERNAME: ${{ vars.GITEE_USERNAME }} - GITEE_TOKEN: ${{ secrets.GITEE_TOKEN }} - run: | - TARGET="${{ matrix.target }}" - IMAGE_TAG="swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/vllm-ascend:nightly-${TARGET}" - - echo "Building image: $IMAGE_TAG" - docker build \ - --network host \ - --platform linux/arm64 \ - -f .github/workflows/dockerfiles/Dockerfile.nightly.${TARGET} \ - --build-arg CANN_VERSION="8.5.1" \ - --build-arg UBUNTU_VERSION="22.04" \ - --build-arg PYTHON_VERSION="3.11" \ - --build-arg GITEE_USERNAME="${GITEE_USERNAME}" \ - --build-arg GITEE_TOKEN="${GITEE_TOKEN}" \ - -t "$IMAGE_TAG" . - - echo "image-tag=$IMAGE_TAG" >> $GITHUB_OUTPUT - - # To avoid pushing images from forks, only push when the repository owner is 'vllm-project' - - name: Push image to SWR - if: ${{ github.repository_owner == 'vllm-project' && steps.login-swr.conclusion == 'success' }} - run: | - docker push ${{ steps.build-image.outputs.image-tag }} diff --git a/.github/workflows/schedule_nightly_test_a2.yaml b/.github/workflows/schedule_nightly_test_a2.yaml index 11026061..f6ea8a85 100644 --- a/.github/workflows/schedule_nightly_test_a2.yaml +++ b/.github/workflows/schedule_nightly_test_a2.yaml @@ -49,125 +49,42 @@ concurrency: jobs: parse-trigger: name: Parse trigger and determine test scope - runs-on: linux-aarch64-a2b3-0 if: >- github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' || contains(github.event.pull_request.labels.*.name, 'nightly-test') - outputs: - should_run: ${{ steps.parse.outputs.should_run }} - test_filter: ${{ steps.parse.outputs.test_filter }} - is_pr_event: ${{ steps.parse.outputs.is_pr_event }} - steps: - - name: Parse trigger - id: parse - uses: actions/github-script@v7 - with: - script: | - const eventName = context.eventName; + uses: ./.github/workflows/_parse_trigger.yaml - function parseNightlyComment(body) { - if (!body) return null; - const match = body.trim().match(/^\/nightly(?:\s+(.+))?$/m); - if (!match) return null; - const args = (match[1] || '').trim(); - if (!args || args === 'all') return 'all'; - // Wrap with commas for exact-name matching: ",name1,name2," - return ',' + args.split(/\s+/).join(',') + ','; - } - - // schedule / workflow_dispatch: run all tests with pre-built image - if (eventName === 'schedule' || eventName === 'workflow_dispatch') { - core.setOutput('should_run', 'true'); - core.setOutput('test_filter', 'all'); - core.setOutput('is_pr_event', 'false'); - return; - } - - // pull_request (labeled / synchronize) - if (eventName === 'pull_request') { - const labels = context.payload.pull_request.labels.map(l => l.name); - if (!labels.includes('nightly-test')) { - core.setOutput('should_run', 'false'); - core.setOutput('test_filter', ''); - core.setOutput('is_pr_event', 'true'); - return; - } - // Search comments for latest /nightly command - const prNumber = context.payload.pull_request.number; - const comments = await github.paginate(github.rest.issues.listComments, { - owner: context.repo.owner, - repo: context.repo.repo, - issue_number: prNumber, - per_page: 100, - }); - let testFilter = null; - for (let i = comments.length - 1; i >= 0; i--) { - const result = parseNightlyComment(comments[i].body); - if (result !== null) { testFilter = result; break; } - } - // No /nightly comment found: do not run any tests - if (testFilter === null) { - core.info('nightly-test label present but no /nightly comment found; skipping.'); - core.setOutput('should_run', 'false'); - core.setOutput('test_filter', ''); - core.setOutput('is_pr_event', 'true'); - return; - } - core.setOutput('should_run', 'true'); - core.setOutput('test_filter', testFilter); - core.setOutput('is_pr_event', 'true'); - return; - } - - // Fallback - core.setOutput('should_run', 'false'); - core.setOutput('test_filter', ''); - core.setOutput('is_pr_event', 'false'); + build-image: + name: Build nightly-a2 image + if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' + uses: ./.github/workflows/_nightly_image_build.yaml + with: + target: a2 + secrets: + HW_USERNAME: ${{ secrets.HW_USERNAME }} + HW_TOKEN: ${{ secrets.HW_TOKEN }} + GITEE_TOKEN: ${{ secrets.GITEE_TOKEN }} single-node-tests: name: single-node - needs: [parse-trigger] - if: always() && needs.parse-trigger.outputs.should_run == 'true' + needs: [parse-trigger, build-image] + if: >- + always() && + needs.parse-trigger.outputs.run == 'true' && + (needs.build-image.result == 'success' || needs.build-image.result == 'skipped') strategy: fail-fast: false matrix: test_config: + # pytest-driven tests - name: test_custom_op os: linux-aarch64-a2b3-1 tests: tests/e2e/nightly/single_node/ops/singlecard_ops - name: test_custom_op_multi_card os: linux-aarch64-a2b3-4 tests: tests/e2e/nightly/single_node/ops/multicard_ops_a2/ - uses: ./.github/workflows/_e2e_nightly_single_node.yaml - with: - runner: ${{ matrix.test_config.os }} - tests: ${{ matrix.test_config.tests }} - name: ${{ matrix.test_config.name }} - image: 'swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/vllm-ascend:nightly-a2' - is_pr_test: >- - ${{ - needs.parse-trigger.outputs.is_pr_event == 'true' && ( - needs.parse-trigger.outputs.test_filter == 'all' || - contains(needs.parse-trigger.outputs.test_filter, format(',{0},', matrix.test_config.name)) - ) - }} - is_run: >- - ${{ - needs.parse-trigger.outputs.should_run == 'true' && ( - needs.parse-trigger.outputs.test_filter == 'all' || - contains(needs.parse-trigger.outputs.test_filter, format(',{0},', matrix.test_config.name)) - ) - }} - - single-node-yaml-tests: - name: single-node - needs: [parse-trigger] - if: always() && needs.parse-trigger.outputs.should_run == 'true' - strategy: - fail-fast: false - matrix: - test_config: + # YAML-driven tests - name: qwen3-32b os: linux-aarch64-a2b3-4 config_file_path: Qwen3-32B.yaml @@ -181,28 +98,24 @@ jobs: with: runner: ${{ matrix.test_config.os }} image: 'swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/vllm-ascend:nightly-a2' + tests: ${{ matrix.test_config.tests }} config_file_path: ${{ matrix.test_config.config_file_path }} name: ${{ matrix.test_config.name }} - is_pr_test: >- + should_run: >- ${{ - needs.parse-trigger.outputs.is_pr_event == 'true' && ( - needs.parse-trigger.outputs.test_filter == 'all' || - contains(needs.parse-trigger.outputs.test_filter, format(',{0},', matrix.test_config.name)) + needs.parse-trigger.outputs.run == 'true' && ( + needs.parse-trigger.outputs.filter == 'all' || + contains(needs.parse-trigger.outputs.filter, format(',{0},', matrix.test_config.name)) ) }} - is_run: >- - ${{ - needs.parse-trigger.outputs.should_run == 'true' && ( - needs.parse-trigger.outputs.test_filter == 'all' || - contains(needs.parse-trigger.outputs.test_filter, format(',{0},', matrix.test_config.name)) - ) - }} - multi-node-tests: name: multi-node - needs: [parse-trigger, single-node-tests, single-node-yaml-tests] - if: always() && needs.parse-trigger.outputs.should_run == 'true' + needs: [parse-trigger, build-image, single-node-tests] + if: >- + always() && + needs.parse-trigger.outputs.run == 'true' && + (needs.build-image.result == 'success' || needs.build-image.result == 'skipped') strategy: fail-fast: false max-parallel: 2 @@ -222,19 +135,12 @@ jobs: replicas: 1 size: ${{ matrix.test_config.size }} config_file_path: ${{ matrix.test_config.config_file_path }} - vllm_ascend_ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.ref_name }} - is_pr_test: >- + vllm_ascend_ref: ${{ needs.parse-trigger.outputs.ref }} + should_run: >- ${{ - needs.parse-trigger.outputs.is_pr_event == 'true' && ( - needs.parse-trigger.outputs.test_filter == 'all' || - contains(needs.parse-trigger.outputs.test_filter, format(',{0},', matrix.test_config.name)) - ) - }} - is_run: >- - ${{ - needs.parse-trigger.outputs.should_run == 'true' && ( - needs.parse-trigger.outputs.test_filter == 'all' || - contains(needs.parse-trigger.outputs.test_filter, format(',{0},', matrix.test_config.name)) + needs.parse-trigger.outputs.run == 'true' && ( + needs.parse-trigger.outputs.filter == 'all' || + contains(needs.parse-trigger.outputs.filter, format(',{0},', matrix.test_config.name)) ) }} secrets: @@ -242,7 +148,7 @@ jobs: single-node-accuracy-tests: needs: [parse-trigger] - if: always() && needs.parse-trigger.outputs.should_run == 'true' + if: always() && needs.parse-trigger.outputs.run == 'true' strategy: fail-fast: false matrix: @@ -283,9 +189,9 @@ jobs: image: 'swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.5.1-910b-ubuntu22.04-py3.11' is_run: >- ${{ - needs.parse-trigger.outputs.should_run == 'true' && ( - needs.parse-trigger.outputs.test_filter == 'all' || - contains(needs.parse-trigger.outputs.test_filter, format(',{0},', matrix.test_config.name)) + needs.parse-trigger.outputs.run == 'true' && ( + needs.parse-trigger.outputs.filter == 'all' || + contains(needs.parse-trigger.outputs.filter, format(',{0},', matrix.test_config.name)) ) }} upload: false diff --git a/.github/workflows/schedule_nightly_test_a3.yaml b/.github/workflows/schedule_nightly_test_a3.yaml index e2ada87f..8f323258 100644 --- a/.github/workflows/schedule_nightly_test_a3.yaml +++ b/.github/workflows/schedule_nightly_test_a3.yaml @@ -49,86 +49,30 @@ concurrency: jobs: parse-trigger: name: Parse trigger and determine test scope - runs-on: linux-aarch64-a2b3-0 if: >- github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' || contains(github.event.pull_request.labels.*.name, 'nightly-test') - outputs: - should_run: ${{ steps.parse.outputs.should_run }} - test_filter: ${{ steps.parse.outputs.test_filter }} - is_pr_event: ${{ steps.parse.outputs.is_pr_event }} - steps: - - name: Parse trigger - id: parse - uses: actions/github-script@v7 - with: - script: | - const eventName = context.eventName; + uses: ./.github/workflows/_parse_trigger.yaml - function parseNightlyComment(body) { - if (!body) return null; - const match = body.trim().match(/^\/nightly(?:\s+(.+))?$/m); - if (!match) return null; - const args = (match[1] || '').trim(); - if (!args || args === 'all') return 'all'; - // Wrap with commas for exact-name matching: ",name1,name2," - return ',' + args.split(/\s+/).join(',') + ','; - } - - // schedule / workflow_dispatch: run all tests with pre-built image - if (eventName === 'schedule' || eventName === 'workflow_dispatch') { - core.setOutput('should_run', 'true'); - core.setOutput('test_filter', 'all'); - core.setOutput('is_pr_event', 'false'); - return; - } - - // pull_request (labeled / synchronize) - if (eventName === 'pull_request') { - const labels = context.payload.pull_request.labels.map(l => l.name); - if (!labels.includes('nightly-test')) { - core.setOutput('should_run', 'false'); - core.setOutput('test_filter', ''); - core.setOutput('is_pr_event', 'true'); - return; - } - // Search comments for latest /nightly command - const prNumber = context.payload.pull_request.number; - const comments = await github.paginate(github.rest.issues.listComments, { - owner: context.repo.owner, - repo: context.repo.repo, - issue_number: prNumber, - per_page: 100, - }); - let testFilter = null; - for (let i = comments.length - 1; i >= 0; i--) { - const result = parseNightlyComment(comments[i].body); - if (result !== null) { testFilter = result; break; } - } - // No /nightly comment found: do not run any tests - if (testFilter === null) { - core.info('nightly-test label present but no /nightly comment found; skipping.'); - core.setOutput('should_run', 'false'); - core.setOutput('test_filter', ''); - core.setOutput('is_pr_event', 'true'); - return; - } - core.setOutput('should_run', 'true'); - core.setOutput('test_filter', testFilter); - core.setOutput('is_pr_event', 'true'); - return; - } - - // Fallback - core.setOutput('should_run', 'false'); - core.setOutput('test_filter', ''); - core.setOutput('is_pr_event', 'false'); + build-image: + name: Build nightly-a3 image + if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' + uses: ./.github/workflows/_nightly_image_build.yaml + with: + target: a3 + secrets: + HW_USERNAME: ${{ secrets.HW_USERNAME }} + HW_TOKEN: ${{ secrets.HW_TOKEN }} + GITEE_TOKEN: ${{ secrets.GITEE_TOKEN }} multi-node-tests: name: multi-node - needs: [parse-trigger] - if: always() && needs.parse-trigger.outputs.should_run == 'true' + needs: [parse-trigger, build-image] + if: >- + always() && + needs.parse-trigger.outputs.run == 'true' && + (needs.build-image.result == 'success' || needs.build-image.result == 'skipped') strategy: fail-fast: false max-parallel: 2 @@ -140,15 +84,9 @@ jobs: - name: multi-node-qwen3-dp config_file_path: Qwen3-235B-A22B.yaml size: 2 - # - name: multi-node-dpsk-4node-pd - # config_file_path: DeepSeek-R1-W8A8.yaml - # size: 4 - name: multi-node-qwenw8a8-2node config_file_path: Qwen3-235B-W8A8.yaml size: 2 - # - name: multi-node-deepseek-r1-w8a8-eplb - # config_file_path: DeepSeek-R1-W8A8-EPLB.yaml - # size: 4 - name: multi-node-qwenw8a8-2node-eplb config_file_path: Qwen3-235B-W8A8-EPLB.yaml size: 2 @@ -190,19 +128,12 @@ jobs: replicas: 1 size: ${{ matrix.test_config.size }} config_file_path: ${{ matrix.test_config.config_file_path }} - vllm_ascend_ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.ref_name }} - is_pr_test: >- + vllm_ascend_ref: ${{ needs.parse-trigger.outputs.ref }} + should_run: >- ${{ - needs.parse-trigger.outputs.is_pr_event == 'true' && ( - needs.parse-trigger.outputs.test_filter == 'all' || - contains(needs.parse-trigger.outputs.test_filter, format(',{0},', matrix.test_config.name)) - ) - }} - is_run: >- - ${{ - needs.parse-trigger.outputs.should_run == 'true' && ( - needs.parse-trigger.outputs.test_filter == 'all' || - contains(needs.parse-trigger.outputs.test_filter, format(',{0},', matrix.test_config.name)) + needs.parse-trigger.outputs.run == 'true' && ( + needs.parse-trigger.outputs.filter == 'all' || + contains(needs.parse-trigger.outputs.filter, format(',{0},', matrix.test_config.name)) ) }} secrets: @@ -210,44 +141,22 @@ jobs: single-node-tests: name: single-node - needs: [parse-trigger, multi-node-tests] - if: always() && needs.parse-trigger.outputs.should_run == 'true' + needs: [parse-trigger, build-image, multi-node-tests] + if: >- + always() && + needs.parse-trigger.outputs.run == 'true' && + (needs.build-image.result == 'success' || needs.build-image.result == 'skipped') strategy: fail-fast: false matrix: test_config: + # pytest-driven tests - name: qwen3-30b-acc os: linux-aarch64-a3-4 tests: tests/e2e/weekly/single_node/models/test_qwen3_30b_acc.py - uses: ./.github/workflows/_e2e_nightly_single_node.yaml - with: - runner: ${{ matrix.test_config.os }} - image: 'swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/vllm-ascend:nightly-a3' - tests: ${{ matrix.test_config.tests }} - name: ${{ matrix.test_config.name }} - is_pr_test: >- - ${{ - needs.parse-trigger.outputs.is_pr_event == 'true' && ( - needs.parse-trigger.outputs.test_filter == 'all' || - contains(needs.parse-trigger.outputs.test_filter, format(',{0},', matrix.test_config.name)) - ) - }} - is_run: >- - ${{ - needs.parse-trigger.outputs.should_run == 'true' && ( - needs.parse-trigger.outputs.test_filter == 'all' || - contains(needs.parse-trigger.outputs.test_filter, format(',{0},', matrix.test_config.name)) - ) - }} - - single-node-yaml-tests: - name: single-node - if: always() && needs.parse-trigger.outputs.should_run == 'true' - needs: [parse-trigger, multi-node-tests] - strategy: - fail-fast: false - matrix: - test_config: + - name: custom-multi-ops + os: linux-aarch64-a3-16 + tests: tests/e2e/nightly/single_node/ops/multicard_ops_a3/ # YAML-driven tests - name: deepseek-r1-0528-w8a8 os: linux-aarch64-a3-16 @@ -316,51 +225,13 @@ jobs: with: runner: ${{ matrix.test_config.os }} image: 'swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/vllm-ascend:nightly-a3' + tests: ${{ matrix.test_config.tests }} config_file_path: ${{ matrix.test_config.config_file_path }} name: ${{ matrix.test_config.name }} - is_pr_test: >- + should_run: >- ${{ - needs.parse-trigger.outputs.is_pr_event == 'true' && ( - needs.parse-trigger.outputs.test_filter == 'all' || - contains(needs.parse-trigger.outputs.test_filter, format(',{0},', matrix.test_config.name)) - ) - }} - is_run: >- - ${{ - needs.parse-trigger.outputs.should_run == 'true' && ( - needs.parse-trigger.outputs.test_filter == 'all' || - contains(needs.parse-trigger.outputs.test_filter, format(',{0},', matrix.test_config.name)) - ) - }} - - custom-ops-tests: - name: test ops - needs: [parse-trigger, multi-node-tests] - if: always() && needs.parse-trigger.outputs.should_run == 'true' - strategy: - fail-fast: false - matrix: - test_config: - - name: custom-multi-ops - os: linux-aarch64-a3-16 - tests: tests/e2e/nightly/single_node/ops/multicard_ops_a3/ - uses: ./.github/workflows/_e2e_nightly_single_node.yaml - with: - runner: ${{ matrix.test_config.os }} - image: 'swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/vllm-ascend:nightly-a3' - tests: ${{ matrix.test_config.tests }} - name: ${{ matrix.test_config.name }} - is_pr_test: >- - ${{ - needs.parse-trigger.outputs.is_pr_event == 'true' && ( - needs.parse-trigger.outputs.test_filter == 'all' || - contains(needs.parse-trigger.outputs.test_filter, format(',{0},', matrix.test_config.name)) - ) - }} - is_run: >- - ${{ - needs.parse-trigger.outputs.should_run == 'true' && ( - needs.parse-trigger.outputs.test_filter == 'all' || - contains(needs.parse-trigger.outputs.test_filter, format(',{0},', matrix.test_config.name)) + needs.parse-trigger.outputs.run == 'true' && ( + needs.parse-trigger.outputs.filter == 'all' || + contains(needs.parse-trigger.outputs.filter, format(',{0},', matrix.test_config.name)) ) }}