Files
xc-llm-ascend/.github/workflows/accuracy_test.yaml
wangxiyuan 4f5964420e [CI] Upgrade vllm to 0.9.1 (#1165)
1. upgrade vllm to 0.9.1. 0.9.0 is not supported for main branch now.
keep doc to 0.9.0 until we release the first 0.9.1 release.
2. disable V0 test for PR
3. move actionlint check to lint job

Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
2025-06-11 16:33:11 +08:00

255 lines
10 KiB
YAML

#
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# This file is a part of the vllm-ascend project.
#
# This test will be triggered:
# 1. PR labeled with: '*accuracy-test' (ONLY 1 label valid) & 'ready-for-test'
# 2. workflow_dispatch with models input
# See detail rule in strategy.matrix note
name: Benchmarks / accuracy
on:
pull_request:
types: [ labeled ]
workflow_dispatch:
inputs:
vllm-version:
description: 'vllm version:'
required: true
type: choice
# Please also update this when bump matched version
# Current supported vLLM versions
options:
- main
- v0.9.1
- v0.7.3
vllm-ascend-version:
description: 'vllm-ascend version:'
required: true
type: choice
options:
- main
- v0.7.3-dev
models:
description: 'model:'
required: true
type: choice
options:
- all
- Qwen/Qwen2.5-7B-Instruct
- Qwen/Qwen2.5-VL-7B-Instruct
- Qwen/Qwen3-8B-Base
default: 'all'
# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
# declared as "shell: bash -el {0}" on steps that need to be properly activated.
# It's used to activate ascend-toolkit environment variables.
defaults:
run:
shell: bash -el {0}
# only cancel in-progress runs of the same workflow
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
jobs:
accuracy_tests:
# test will be triggered when tag '*-accuracy-test' & 'ready-for-test' or workflow_dispatch job
if: >-
${{
(contains(github.event.pull_request.labels.*.name, 'accuracy-test') ||
contains(github.event.pull_request.labels.*.name, 'vl-accuracy-test') ||
contains(github.event.pull_request.labels.*.name, 'dense-accuracy-test')) &&
contains(github.event.pull_request.labels.*.name, 'ready-for-test') ||
github.event_name == 'workflow_dispatch'
}}
runs-on: >-
${{
(matrix.model_name == 'Qwen/Qwen2.5-VL-7B-Instruct' && 'linux-arm64-npu-4') ||
'linux-arm64-npu-2'
}}
strategy:
matrix:
vllm_use_version: [0, 1]
# the accuracy test will run:
# 1. workflow_dispatch with models input
# - all: Qwen/Qwen2.5-7B-Instruct, Qwen/Qwen2.5-VL-7B-Instruct, Qwen/Qwen3-8B-Base
# - specified but not all: Qwen/Qwen2.5-7B-Instruct, Qwen/Qwen2.5-VL-7B-Instruct, Qwen/Qwen3-8B-Base
# 2. PR labeled with "*-accuracy-test"
# - accuracy-test: Qwen/Qwen2.5-7B-Instruct, Qwen/Qwen2.5-VL-7B-Instruct
# - dense-accuracy-test: Qwen/Qwen2.5-7B-Instruct
# - vl-accuracy-test: Qwen/Qwen2.5-VL-7B-Instruct
model_name: ${{ fromJSON(
(github.event.inputs.models == 'all' &&
'["Qwen/Qwen2.5-7B-Instruct","Qwen/Qwen2.5-VL-7B-Instruct","Qwen/Qwen3-8B-Base"]') ||
(github.event.inputs.models == 'Qwen/Qwen2.5-7B-Instruct' &&
'["Qwen/Qwen2.5-7B-Instruct"]') ||
(github.event.inputs.models == 'Qwen/Qwen2.5-VL-7B-Instruct' &&
'["Qwen/Qwen2.5-VL-7B-Instruct"]') ||
(github.event.inputs.models == 'Qwen/Qwen3-8B-Base' &&
'["Qwen/Qwen3-8B-Base"]') ||
contains(github.event.pull_request.labels.*.name, 'accuracy-test') &&
'["Qwen/Qwen2.5-7B-Instruct","Qwen/Qwen2.5-VL-7B-Instruct"]' ||
contains(github.event.pull_request.labels.*.name, 'dense-accuracy-test') &&
'["Qwen/Qwen2.5-7B-Instruct"]' ||
contains(github.event.pull_request.labels.*.name, 'vl-accuracy-test') &&
'["Qwen/Qwen2.5-VL-7B-Instruct"]'
) }}
# Remove exclude after https://github.com/vllm-project/vllm-ascend/issues/1044 resolved
exclude:
- model_name: Qwen/Qwen2.5-VL-7B-Instruct
vllm_use_version: 1
fail-fast: false
name: ${{ matrix.model_name }} accuracy V${{ matrix.vllm_use_version }}
container:
image: m.daocloud.io/quay.io/ascend/cann:8.1.rc1-910b-ubuntu22.04-py3.10
env:
HF_ENDPOINT: https://hf-mirror.com
HF_TOKEN: ${{ secrets.HF_TOKEN }}
DATASET_SOURCE: ModelScope
VLLM_USE_MODELSCOPE: True
# 1. If version specified (work_dispatch), do specified branch accuracy test
# 2. If no version (labeled PR), do accuracy test by default ref:
# The branch, tag or SHA to checkout. When checking out the repository that
# triggered a workflow, this defaults to the reference or SHA for that event.
# Otherwise, uses the default branch.
GHA_VLLM_ASCEND_VERSION: ${{ github.event.inputs.vllm-ascend-version }}
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Check npu and CANN info
run: |
npu-smi info
cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
- name: Config mirrors
run: |
sed -i 's|ports.ubuntu.com|mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list
pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
apt-get update -y
apt install git -y
git config --global url."https://gh-proxy.test.osinfra.cn/https://github.com/".insteadOf https://github.com/
- name: Install system dependencies
run: |
apt-get -y install `cat packages.txt`
apt-get -y install gcc g++ cmake libnuma-dev
- name: Checkout vllm-project/vllm repo
uses: actions/checkout@v4
with:
repository: vllm-project/vllm
path: ./vllm-empty
# Please also update this when bump matched version
ref: ${{ github.event.inputs.vllm-version || 'v0.9.1' }}
- name: Install vllm-project/vllm from source
working-directory: ./vllm-empty
run: VLLM_TARGET_DEVICE=empty pip install -e .
- name: Checkout vllm-project/vllm-ascend repo
uses: actions/checkout@v4
with:
repository: vllm-project/vllm-ascend
path: ./vllm-ascend
ref: ${{ env.GHA_VLLM_ASCEND_VERSION }}
- name: Install vllm-project/vllm-ascend
working-directory: ./vllm-ascend
run: |
pip install -r requirements-dev.txt
pip install -e .
- name: Install lm-eval, ray, and datasets
run: |
pip install lm-eval
- name: Collect version info
run: |
for dir in /usr/local/Ascend/ascend-toolkit/*; do
dname=$(basename "$dir")
if [ "$dname" != "latest" ]; then
TOOLKIT_DIR="$dname"
break
fi
done
INFO_FILE="/usr/local/Ascend/ascend-toolkit/${TOOLKIT_DIR}/$(uname -i)-linux/ascend_toolkit_install.info"
GHA_CANN_VERSION=$(grep "version=" "$INFO_FILE" \
| head -n1 \
| cut -d'=' -f2 \
| tr -d '"')
{
echo "GHA_CANN_VERSION=$GHA_CANN_VERSION"
pip show torch | grep "Version:" | awk '{print "GHA_TORCH_VERSION="$2}'
pip show torch_npu | grep "Version:" | awk '{print "GHA_TORCH_NPU_VERSION="$2}'
pip show vllm | grep "Version:" | awk '{print "GHA_VLLM_VERSION="$2}' | sed 's/+.*//'
echo "GHA_VLLM_ASCEND_VERSION=${{ github.event.inputs.vllm-ascend-version || github.ref }}"
} >> "$GITHUB_ENV"
- name: Print versions
run: |
echo "CANN: ${{ env.GHA_CANN_VERSION }}"
echo "Torch NPU: ${{ env.GHA_TORCH_NPU_VERSION }}"
echo "Torch: ${{ env.GHA_TORCH_VERSION }}"
echo "vLLM: ${{ env.GHA_VLLM_VERSION }}"
echo "vLLM Ascend: ${{ env.GHA_VLLM_ASCEND_VERSION }}"
- name: Run Accuracy Test for V${{ matrix.vllm_use_version }}
id: report
working-directory: ./benchmarks
env:
PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256
VLLM_USE_V1: ${{ matrix.vllm_use_version }}
run: |
model_base_name=$(basename ${{ matrix.model_name }})
markdown_name="${model_base_name}-V${{ matrix.vllm_use_version }}"
echo "markdown_name=$markdown_name"
echo "markdown_name=$markdown_name" >> $GITHUB_OUTPUT
mkdir -p ./accuracy
python ./scripts/run_accuracy.py \
--model "${{ matrix.model_name }}" \
--output "./accuracy/${markdown_name}.md" \
--vllm_ascend_version "${{ env.GHA_VLLM_ASCEND_VERSION || github.ref }}" \
--cann_version "${{ env.GHA_CANN_VERSION }}" \
--torch_npu_version "${{ env.GHA_TORCH_NPU_VERSION }}" \
--torch_version "${{ env.GHA_TORCH_VERSION }}" \
--vllm_version "${{ env.GHA_VLLM_VERSION }}"
- name: Generate step summary
if: ${{ always() }}
run: |
cat ./benchmarks/accuracy/${{ steps.report.outputs.markdown_name }}.md >> $GITHUB_STEP_SUMMARY
- name: Sanitize version string for artifact naming
run: |
SAFE_VLLM_ASCEND_VERSION="${GHA_VLLM_ASCEND_VERSION//\//-}"
echo "SAFE_VLLM_ASCEND_VERSION=$SAFE_VLLM_ASCEND_VERSION" >> "$GITHUB_ENV"
- name: Upload Report for V${{ matrix.vllm_use_version }}
if: ${{ github.event_name == 'workflow_dispatch' }}
uses: actions/upload-artifact@v4
with:
name: "${{ env.SAFE_VLLM_ASCEND_VERSION }}-${{ steps.report.outputs.markdown_name }}-report"
path: ./benchmarks/accuracy/${{ steps.report.outputs.markdown_name }}.md
if-no-files-found: warn
retention-days: 90
overwrite: true