### What this PR does / why we need it? This PR enable accuracy test for PR labeled with "*accuracy-test" and workflow_dispatch. Only one model test running for each type test to reduce excution time. - The dense test costs about `25mins` to complete (gsm8k 7mins, ~mmlu 3h24mins,~ cEval 18mins) - The vl test costs about `40mins` to complete In futute, we might consider enable all job test as nightly schedule job. Below is mainly changes: - the dense/vl accuracy test will be triggered by lableling `accuracy-test` and `ready-for-test` - the dense accuracy test will be triggered by lableling `dense-accuracy-test` and `ready-for-test` - the vl accuracy test will be triggered by lableling `vl-accuracy-test` and `ready-for-test` - accuracy test will also be triggered by workflow_dispatch - Support V1 and V0 for qwen and V0 for VL For PR test we also generate summary in test summary. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? - CI passed with accuracy-test label - Preview: https://github.com/vllm-project/vllm-ascend/actions/runs/15407628722?pr=1040 Closes: https://github.com/vllm-project/vllm-ascend/pull/953 --------- Signed-off-by: hfadzxy <starmoon_zhang@163.com> Signed-off-by: Yikun Jiang <yikunkero@gmail.com> Co-authored-by: hfadzxy <starmoon_zhang@163.com>
129 lines
5.9 KiB
YAML
129 lines
5.9 KiB
YAML
#
|
|
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
# This file is a part of the vllm-ascend project.
|
|
#
|
|
|
|
name: Accuracy Report
|
|
on:
|
|
workflow_dispatch:
|
|
inputs:
|
|
branch:
|
|
description: 'choose a dev branch to pr'
|
|
required: true
|
|
vllm-ascend-version:
|
|
description: 'what vllm-ascend version to accuracy test?'
|
|
required: true
|
|
type: string
|
|
jobs:
|
|
download:
|
|
runs-on: ubuntu-latest
|
|
steps:
|
|
- name: Checkout repository
|
|
uses: actions/checkout@v4
|
|
with:
|
|
ref: ${{ github.event.inputs.branch }}
|
|
|
|
- name: Debug List Artifacts
|
|
run: gh api /repos/${{ github.repository }}/actions/artifacts
|
|
env:
|
|
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
|
|
- name: Query artifact run id for Qwen2.5-VL-7B-Instruct V0 latest artifact
|
|
id: get_Qwen2_5_VL_7B_Instruct_latest_run_id_V0
|
|
run: |
|
|
ARTIFACT_JSON=$(gh api "repos/${{ github.repository }}/actions/artifacts")
|
|
RUN_ID=$(echo "$ARTIFACT_JSON" | \
|
|
jq -r '[.artifacts[] | select(.name=="${{ github.event.inputs.vllm-ascend-version }}-Qwen2.5-VL-7B-Instruct-V0-report")] | sort_by(.created_at) | last | .workflow_run.id')
|
|
echo "runid=$RUN_ID" >> "$GITHUB_OUTPUT"
|
|
env:
|
|
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
|
|
- name: Query artifact run id for Qwen2.5-7B-Instruct V0 latest artifact
|
|
id: get_Qwen2_5_7B_Instruct_latest_run_id_V0
|
|
run: |
|
|
ARTIFACT_JSON=$(gh api "repos/${{ github.repository }}/actions/artifacts")
|
|
RUN_ID=$(echo "$ARTIFACT_JSON" | \
|
|
jq -r '[.artifacts[] | select(.name=="${{ github.event.inputs.vllm-ascend-version }}-Qwen2.5-7B-Instruct-V0-report")] | sort_by(.created_at) | last | .workflow_run.id')
|
|
echo "runid=$RUN_ID" >> "$GITHUB_OUTPUT"
|
|
env:
|
|
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
|
|
- name: Query artifact run id for Qwen3-8B-Base V0 latest artifact
|
|
id: get_Qwen3_8B_Base_latest_run_id_V0
|
|
run: |
|
|
ARTIFACT_JSON=$(gh api "repos/${{ github.repository }}/actions/artifacts")
|
|
RUN_ID=$(echo "$ARTIFACT_JSON" | \
|
|
jq -r '[.artifacts[] | select(.name=="${{ github.event.inputs.vllm-ascend-version }}-Qwen3-8B-Base-V0-report")] | sort_by(.created_at) | last | .workflow_run.id')
|
|
echo "runid=$RUN_ID" >> "$GITHUB_OUTPUT"
|
|
env:
|
|
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
|
|
- name: Download Qwen/Qwen2.5-VL-7B-Instruct V0 Artifact
|
|
uses: actions/download-artifact@v4
|
|
with:
|
|
name: ${{ github.event.inputs.vllm-ascend-version }}-Qwen2.5-VL-7B-Instruct-V0-report
|
|
path: ./docs/source/developer_guide/evaluation/accuracy_report
|
|
github-token: ${{ secrets.GITHUB_TOKEN }}
|
|
repository: vllm-project/vllm-ascend
|
|
run-id: ${{ steps.get_Qwen2_5_VL_7B_Instruct_latest_run_id_V0.outputs.runid }}
|
|
|
|
- name: Download Qwen/Qwen2.5-7B-Instruct Artifact
|
|
uses: actions/download-artifact@v4
|
|
with:
|
|
name: ${{ github.event.inputs.vllm-ascend-version }}-Qwen2.5-7B-Instruct-V0-report
|
|
path: ./docs/source/developer_guide/evaluation/accuracy_report
|
|
github-token: ${{ secrets.GITHUB_TOKEN }}
|
|
repository: vllm-project/vllm-ascend
|
|
run-id: ${{ steps.get_Qwen2_5_7B_Instruct_latest_run_id_V0.outputs.runid }}
|
|
|
|
- name: Download Qwen/Qwen3-8B-Base Artifact
|
|
uses: actions/download-artifact@v4
|
|
with:
|
|
name: ${{ github.event.inputs.vllm-ascend-version }}-Qwen3-8B-Base-V0-report
|
|
path: ./docs/source/developer_guide/evaluation/accuracy_report
|
|
github-token: ${{ secrets.GITHUB_TOKEN }}
|
|
repository: vllm-project/vllm-ascend
|
|
run-id: ${{ steps.get_Qwen3_8B_Base_latest_run_id_V0.outputs.runid }}
|
|
|
|
- name: Display Files
|
|
working-directory: ./docs/source/developer_guide/evaluation/accuracy_report
|
|
run: |
|
|
cat ./Qwen2.5-VL-7B-Instruct.md
|
|
cat ./Qwen2.5-7B-Instruct.md
|
|
cat ./Qwen3-8B-Base.md
|
|
|
|
- name: Create Pull Request for markdown update
|
|
uses: peter-evans/create-pull-request@v7
|
|
with:
|
|
token: ${{ secrets.PR_TOKEN }}
|
|
base: ${{ github.event.inputs.branch }}
|
|
branch: auto-pr/accuracy-test
|
|
commit-message: "Update accuracy report for ${{ github.event.inputs.branch }}"
|
|
add-paths: ./docs/source/developer_guide/evaluation/accuracy_report/*.md
|
|
title: "[Doc]Update accuracy report for ${{ github.event.inputs.branch }}"
|
|
body: |
|
|
The accuracy results running on Ascend NPU have changed, I'm updating the report.
|
|
Please review the changes.
|
|
|
|
- [Workflow run][1]
|
|
- [Qwen2.5-7B-Instruct accuracy report][2]
|
|
- [Qwen2.5-VL-7B-Instruct accuracy report][3]
|
|
- [Qwen3-8B-Base accuracy report][4]
|
|
|
|
[1]: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
|
[2]: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ steps.get_Qwen2_5_7B_Instruct_latest_run_id_V0.outputs.runid }}
|
|
[3]: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ steps.get_Qwen2_5_VL_7B_Instruct_latest_run_id_V0.outputs.runid }}
|
|
[4]: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ steps.get_Qwen3_8B_Base_latest_run_id_V0.outputs.runid }}
|