[CI] support nightly ci for per pr by labels (#6483)

### What this PR does / why we need it? This PR refactors the nightly CI workflows (A2 and A3) to support running tests against a specific PR's code, in addition to the existing scheduled/dispatch runs using pre-built images. #### Motivation: Previously, nightly tests could only be triggered by schedule or workflow_dispatch, always using the pre-built nightly image. This change allows developers to trigger nightly tests against their own PR's source code, enabling early validation without waiting for a nightly build. #### Changes Trigger logic (parse-trigger job) A new parse-trigger job is introduced in both schedule_nightly_test_a2.yaml and schedule_nightly_test_a3.yaml to centralize trigger evaluation: `schedule / workflow_dispatch`: runs all tests with the pre-built image (existing behavior preserved) `pull_request (labeled + synchronize)`: runs only when:The PR has the nightly-test label, and /nightly [test-names] comment exists (latest one wins) 1. /nightly or /nightly all — runs all tests 2. /nightly test1 test2 — runs only named tests (comma-wrapped for exact matching) #### How to trigger 1. Add the nightly-test label to your PR 2. Comment /nightly (all tests) or /nightly test1 test2 (specific tests) 4. Re-triggering: add another /nightly comment and push a new commit (synchronize event) ### Does this PR introduce _any_ user-facing change? None ### How was this patch tested? - vLLM version: v0.14.1 - vLLM main: dc917cceb8 --------- Signed-off-by: hfadzxy <starmoon_zhang@163.com>
2026-03-05 16:46:37 +08:00
parent a6745b8577
commit 1e4017e3fa
8 changed files with 490 additions and 32 deletions
--- a/.github/workflows/schedule_nightly_test_a2.yaml
+++ b/.github/workflows/schedule_nightly_test_a2.yaml
@@ -24,10 +24,15 @@ on:
      # Run test at 24:00 Beijing time (UTC+8)
      - cron: "0 16 * * *"
  workflow_dispatch:
-  pull_request: 
+  pull_request:
    branches:
      - 'main'
-    types: [ labeled ]
+    types: [labeled, synchronize]
+
+permissions:
+  contents: read
+  pull-requests: read
+  issues: read

 # Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
 # declared as "shell: bash -el {0}" on steps that need to be properly activated.
@@ -42,9 +47,84 @@ concurrency:
  cancel-in-progress: true

 jobs:
+  parse-trigger:
+    name: Parse trigger and determine test scope
+    runs-on: linux-aarch64-a2b3-0
+    outputs:
+      should_run: ${{ steps.parse.outputs.should_run }}
+      test_filter: ${{ steps.parse.outputs.test_filter }}
+      is_pr_event: ${{ steps.parse.outputs.is_pr_event }}
+    steps:
+      - name: Parse trigger
+        id: parse
+        uses: actions/github-script@v7
+        with:
+          script: |
+            const eventName = context.eventName;
+
+            function parseNightlyComment(body) {
+              if (!body) return null;
+              const match = body.trim().match(/^\/nightly(?:\s+(.+))?$/m);
+              if (!match) return null;
+              const args = (match[1] || '').trim();
+              if (!args || args === 'all') return 'all';
+              // Wrap with commas for exact-name matching: ",name1,name2,"
+              return ',' + args.split(/\s+/).join(',') + ',';
+            }
+
+            // schedule / workflow_dispatch: run all tests with pre-built image
+            if (eventName === 'schedule' || eventName === 'workflow_dispatch') {
+              core.setOutput('should_run', 'true');
+              core.setOutput('test_filter', 'all');
+              core.setOutput('is_pr_event', 'false');
+              return;
+            }
+
+            // pull_request (labeled / synchronize)
+            if (eventName === 'pull_request') {
+              const labels = context.payload.pull_request.labels.map(l => l.name);
+              if (!labels.includes('nightly-test')) {
+                core.setOutput('should_run', 'false');
+                core.setOutput('test_filter', '');
+                core.setOutput('is_pr_event', 'true');
+                return;
+              }
+              // Search comments for latest /nightly command
+              const prNumber = context.payload.pull_request.number;
+              const comments = await github.paginate(github.rest.issues.listComments, {
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                issue_number: prNumber,
+                per_page: 100,
+              });
+              let testFilter = null;
+              for (let i = comments.length - 1; i >= 0; i--) {
+                const result = parseNightlyComment(comments[i].body);
+                if (result !== null) { testFilter = result; break; }
+              }
+              // No /nightly comment found: do not run any tests
+              if (testFilter === null) {
+                core.info('nightly-test label present but no /nightly comment found; skipping.');
+                core.setOutput('should_run', 'false');
+                core.setOutput('test_filter', '');
+                core.setOutput('is_pr_event', 'true');
+                return;
+              }
+              core.setOutput('should_run', 'true');
+              core.setOutput('test_filter', testFilter);
+              core.setOutput('is_pr_event', 'true');
+              return;
+            }
+
+            // Fallback
+            core.setOutput('should_run', 'false');
+            core.setOutput('test_filter', '');
+            core.setOutput('is_pr_event', 'false');
+
  single-node-tests:
    name: single-node
-    if: always() && (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch')
+    needs: [parse-trigger]
+    if: always() && needs.parse-trigger.outputs.should_run == 'true'
    strategy:
      fail-fast: false
      matrix:
@@ -61,10 +141,25 @@ jobs:
      tests: ${{ matrix.test_config.tests }}
      name: ${{ matrix.test_config.name }}
      image: 'swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/vllm-ascend:nightly-a2'
+      is_pr_test: >-
+        ${{
+          needs.parse-trigger.outputs.is_pr_event == 'true' && (
+            needs.parse-trigger.outputs.test_filter == 'all' ||
+            contains(needs.parse-trigger.outputs.test_filter, format(',{0},', matrix.test_config.name))
+          )
+        }}
+      is_run: >-
+        ${{
+          needs.parse-trigger.outputs.should_run == 'true' && (
+            needs.parse-trigger.outputs.test_filter == 'all' ||
+            contains(needs.parse-trigger.outputs.test_filter, format(',{0},', matrix.test_config.name))
+          )
+        }}

  single-node-yaml-tests:
    name: single-node
-    if: always() && (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch')
+    needs: [parse-trigger]
+    if: always() && needs.parse-trigger.outputs.should_run == 'true'
    strategy:
      fail-fast: false
      matrix:
@@ -84,11 +179,26 @@ jobs:
      image: 'swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/vllm-ascend:nightly-a2'
      config_file_path: ${{ matrix.test_config.config_file_path }}
      name: ${{ matrix.test_config.name }}
+      is_pr_test: >-
+        ${{
+          needs.parse-trigger.outputs.is_pr_event == 'true' && (
+            needs.parse-trigger.outputs.test_filter == 'all' ||
+            contains(needs.parse-trigger.outputs.test_filter, format(',{0},', matrix.test_config.name))
+          )
+        }}
+      is_run: >-
+        ${{
+          needs.parse-trigger.outputs.should_run == 'true' && (
+            needs.parse-trigger.outputs.test_filter == 'all' ||
+            contains(needs.parse-trigger.outputs.test_filter, format(',{0},', matrix.test_config.name))
+          )
+        }}
+

  multi-node-tests:
    name: multi-node
-    if: always() && (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch')
-    needs: [single-node-tests, single-node-yaml-tests]
+    needs: [parse-trigger, single-node-tests, single-node-yaml-tests]
+    if: always() && needs.parse-trigger.outputs.should_run == 'true'
    strategy:
      fail-fast: false
      max-parallel: 1
@@ -108,40 +218,55 @@ jobs:
      replicas: 1
      size: ${{ matrix.test_config.size }}
      config_file_path: ${{ matrix.test_config.config_file_path }}
+      vllm_ascend_ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.ref_name }}
+      is_pr_test: >-
+        ${{
+          needs.parse-trigger.outputs.is_pr_event == 'true' && (
+            needs.parse-trigger.outputs.test_filter == 'all' ||
+            contains(needs.parse-trigger.outputs.test_filter, format(',{0},', matrix.test_config.name))
+          )
+        }}
+      is_run: >-
+        ${{
+          needs.parse-trigger.outputs.should_run == 'true' && (
+            needs.parse-trigger.outputs.test_filter == 'all' ||
+            contains(needs.parse-trigger.outputs.test_filter, format(',{0},', matrix.test_config.name))
+          )
+        }}
    secrets:
      KUBECONFIG_B64: ${{ secrets.KUBECONFIG_HK_001_INTERNAL_B64 }}

  single-node-accuracy-tests:
-    if: >-
-      ${{
-        github.event_name == 'schedule' ||
-        github.event_name == 'workflow_dispatch' ||
-        contains(github.event.pull_request.labels.*.name, 'accuracy-test')
-      }}
+    needs: [parse-trigger]
+    if: always() && needs.parse-trigger.outputs.should_run == 'true'
    strategy:
      fail-fast: false
      matrix:
        test_config:
-          - os: linux-aarch64-a2b3-1
+          - name: accuracy-group-1
+            os: linux-aarch64-a2b3-1
            model_list:
              - Qwen3-8B
              - Qwen2-Audio-7B-Instruct
              - Qwen3-8B-W8A8
              - Qwen3-VL-8B-Instruct
              - Qwen2.5-Omni-7B
-          - os: linux-aarch64-a2b3-1
+          - name: accuracy-group-2
+            os: linux-aarch64-a2b3-1
            model_list:
              - ERNIE-4.5-21B-A3B-PT
              - InternVL3_5-8B-hf
              - Molmo-7B-D-0924
              - Llama-3.2-3B-Instruct
              - llava-onevision-qwen2-0.5b-ov-hf
-          - os: linux-aarch64-a2b3-2
+          - name: accuracy-group-3
+            os: linux-aarch64-a2b3-2
            model_list:
              - Qwen3-30B-A3B
              - Qwen3-VL-30B-A3B-Instruct
              - Qwen3-30B-A3B-W8A8
-          - os: linux-aarch64-a2b3-4
+          - name: accuracy-group-4
+            os: linux-aarch64-a2b3-4
            model_list:
              - Qwen3-Next-80B-A3B-Instruct
              - Qwen3-Omni-30B-A3B-Instruct
@@ -151,10 +276,18 @@ jobs:
      runner: ${{ matrix.test_config.os }}
      model_list: ${{ toJson(matrix.test_config.model_list) }}
      image: 'swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.5.1-910b-ubuntu22.04-py3.11'
+      is_run: >-
+        ${{
+          needs.parse-trigger.outputs.should_run == 'true' && (
+            needs.parse-trigger.outputs.test_filter == 'all' ||
+            contains(needs.parse-trigger.outputs.test_filter, format(',{0},', matrix.test_config.name))
+          )
+        }}
      upload: false

  doc-test:
    name: doc-test
+    needs: [parse-trigger]
    if: always() && (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch')
    strategy:
      # Each version should be tested