Enable accuracy test for PR labeled with "*accuracy-test" (#1040)
### What this PR does / why we need it? This PR enable accuracy test for PR labeled with "*accuracy-test" and workflow_dispatch. Only one model test running for each type test to reduce excution time. - The dense test costs about `25mins` to complete (gsm8k 7mins, ~mmlu 3h24mins,~ cEval 18mins) - The vl test costs about `40mins` to complete In futute, we might consider enable all job test as nightly schedule job. Below is mainly changes: - the dense/vl accuracy test will be triggered by lableling `accuracy-test` and `ready-for-test` - the dense accuracy test will be triggered by lableling `dense-accuracy-test` and `ready-for-test` - the vl accuracy test will be triggered by lableling `vl-accuracy-test` and `ready-for-test` - accuracy test will also be triggered by workflow_dispatch - Support V1 and V0 for qwen and V0 for VL For PR test we also generate summary in test summary. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? - CI passed with accuracy-test label - Preview: https://github.com/vllm-project/vllm-ascend/actions/runs/15407628722?pr=1040 Closes: https://github.com/vllm-project/vllm-ascend/pull/953 --------- Signed-off-by: hfadzxy <starmoon_zhang@163.com> Signed-off-by: Yikun Jiang <yikunkero@gmail.com> Co-authored-by: hfadzxy <starmoon_zhang@163.com>
This commit is contained in:
32
.github/workflows/accuracy_report.yaml
vendored
32
.github/workflows/accuracy_report.yaml
vendored
@@ -60,16 +60,6 @@ jobs:
|
|||||||
env:
|
env:
|
||||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
|
||||||
- name: Query artifact run id for Llama-3.1-8B-Instruct V0 latest artifact
|
|
||||||
id: get_Llama_3_1_8B_Instruct_latest_run_id_V0
|
|
||||||
run: |
|
|
||||||
ARTIFACT_JSON=$(gh api "repos/${{ github.repository }}/actions/artifacts")
|
|
||||||
RUN_ID=$(echo "$ARTIFACT_JSON" | \
|
|
||||||
jq -r '[.artifacts[] | select(.name=="${{ github.event.inputs.vllm-ascend-version }}-Llama-3.1-8B-Instruct-V0-report")] | sort_by(.created_at) | last | .workflow_run.id')
|
|
||||||
echo "runid=$RUN_ID" >> "$GITHUB_OUTPUT"
|
|
||||||
env:
|
|
||||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
||||||
|
|
||||||
- name: Query artifact run id for Qwen3-8B-Base V0 latest artifact
|
- name: Query artifact run id for Qwen3-8B-Base V0 latest artifact
|
||||||
id: get_Qwen3_8B_Base_latest_run_id_V0
|
id: get_Qwen3_8B_Base_latest_run_id_V0
|
||||||
run: |
|
run: |
|
||||||
@@ -98,15 +88,6 @@ jobs:
|
|||||||
repository: vllm-project/vllm-ascend
|
repository: vllm-project/vllm-ascend
|
||||||
run-id: ${{ steps.get_Qwen2_5_7B_Instruct_latest_run_id_V0.outputs.runid }}
|
run-id: ${{ steps.get_Qwen2_5_7B_Instruct_latest_run_id_V0.outputs.runid }}
|
||||||
|
|
||||||
- name: Download meta-llama/Llama-3.1-8B-Instruct Artifact
|
|
||||||
uses: actions/download-artifact@v4
|
|
||||||
with:
|
|
||||||
name: ${{ github.event.inputs.vllm-ascend-version }}-Llama-3.1-8B-Instruct-V0-report
|
|
||||||
path: ./docs/source/developer_guide/evaluation/accuracy_report
|
|
||||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
|
||||||
repository: vllm-project/vllm-ascend
|
|
||||||
run-id: ${{ steps.get_Llama_3_1_8B_Instruct_latest_run_id_V0.outputs.runid }}
|
|
||||||
|
|
||||||
- name: Download Qwen/Qwen3-8B-Base Artifact
|
- name: Download Qwen/Qwen3-8B-Base Artifact
|
||||||
uses: actions/download-artifact@v4
|
uses: actions/download-artifact@v4
|
||||||
with:
|
with:
|
||||||
@@ -120,7 +101,6 @@ jobs:
|
|||||||
working-directory: ./docs/source/developer_guide/evaluation/accuracy_report
|
working-directory: ./docs/source/developer_guide/evaluation/accuracy_report
|
||||||
run: |
|
run: |
|
||||||
cat ./Qwen2.5-VL-7B-Instruct.md
|
cat ./Qwen2.5-VL-7B-Instruct.md
|
||||||
cat ./Llama-3.1-8B-Instruct.md
|
|
||||||
cat ./Qwen2.5-7B-Instruct.md
|
cat ./Qwen2.5-7B-Instruct.md
|
||||||
cat ./Qwen3-8B-Base.md
|
cat ./Qwen3-8B-Base.md
|
||||||
|
|
||||||
@@ -128,7 +108,7 @@ jobs:
|
|||||||
uses: peter-evans/create-pull-request@v7
|
uses: peter-evans/create-pull-request@v7
|
||||||
with:
|
with:
|
||||||
token: ${{ secrets.PR_TOKEN }}
|
token: ${{ secrets.PR_TOKEN }}
|
||||||
base: ${{ github.ref_name }}
|
base: ${{ github.event.inputs.branch }}
|
||||||
branch: auto-pr/accuracy-test
|
branch: auto-pr/accuracy-test
|
||||||
commit-message: "Update accuracy report for ${{ github.event.inputs.branch }}"
|
commit-message: "Update accuracy report for ${{ github.event.inputs.branch }}"
|
||||||
add-paths: ./docs/source/developer_guide/evaluation/accuracy_report/*.md
|
add-paths: ./docs/source/developer_guide/evaluation/accuracy_report/*.md
|
||||||
@@ -139,12 +119,10 @@ jobs:
|
|||||||
|
|
||||||
- [Workflow run][1]
|
- [Workflow run][1]
|
||||||
- [Qwen2.5-7B-Instruct accuracy report][2]
|
- [Qwen2.5-7B-Instruct accuracy report][2]
|
||||||
- [Llama-3.1-8B-Instruct accuracy report][3]
|
- [Qwen2.5-VL-7B-Instruct accuracy report][3]
|
||||||
- [Qwen2.5-VL-7B-Instruct accuracy report][4]
|
- [Qwen3-8B-Base accuracy report][4]
|
||||||
- [Qwen3-8B-Base accuracy report][5]
|
|
||||||
|
|
||||||
[1]: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
[1]: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||||
[2]: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ steps.get_Qwen2_5_7B_Instruct_latest_run_id_V0.outputs.runid }}
|
[2]: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ steps.get_Qwen2_5_7B_Instruct_latest_run_id_V0.outputs.runid }}
|
||||||
[3]: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ steps.get_Llama_3_1_8B_Instruct_latest_run_id_V0.outputs.runid }}
|
[3]: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ steps.get_Qwen2_5_VL_7B_Instruct_latest_run_id_V0.outputs.runid }}
|
||||||
[4]: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ steps.get_Qwen2_5_VL_7B_Instruct_latest_run_id_V0.outputs.runid }}
|
[4]: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ steps.get_Qwen3_8B_Base_latest_run_id_V0.outputs.runid }}
|
||||||
[5]: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ steps.get_Qwen3_8B_Base_latest_run_id_V0.outputs.runid }}
|
|
||||||
|
|||||||
184
.github/workflows/accuracy_test.yaml
vendored
184
.github/workflows/accuracy_test.yaml
vendored
@@ -15,27 +15,42 @@
|
|||||||
# This file is a part of the vllm-ascend project.
|
# This file is a part of the vllm-ascend project.
|
||||||
#
|
#
|
||||||
|
|
||||||
name: Accuracy Tests
|
# This test will be triggered:
|
||||||
|
# 1. PR labeled with: '*accuracy-test' (ONLY 1 label valid) & 'ready-for-test'
|
||||||
|
# 2. workflow_dispatch with models input
|
||||||
|
# See detail rule in strategy.matrix note
|
||||||
|
name: Benchmarks / accuracy
|
||||||
|
|
||||||
on:
|
on:
|
||||||
|
pull_request:
|
||||||
|
types: [ labeled ]
|
||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
inputs:
|
inputs:
|
||||||
vllm-version:
|
vllm-version:
|
||||||
description: 'what vllm version to accuracy test?'
|
description: 'vllm version:'
|
||||||
required: true
|
required: true
|
||||||
type: string
|
type: choice
|
||||||
|
# Please also update this when bump matched version
|
||||||
|
# Current supported vLLM versions
|
||||||
|
options:
|
||||||
|
- main
|
||||||
|
- v0.9.0.1
|
||||||
|
- v0.9.0
|
||||||
|
- v0.7.3
|
||||||
vllm-ascend-version:
|
vllm-ascend-version:
|
||||||
description: 'what vllm-ascend version to accuracy test?'
|
description: 'vllm-ascend version:'
|
||||||
required: true
|
required: true
|
||||||
type: string
|
type: choice
|
||||||
|
options:
|
||||||
|
- main
|
||||||
|
- v0.7.3-dev
|
||||||
models:
|
models:
|
||||||
description: 'choose model(all/Qwen2.5-7B-Instruct/Llama-3.1-8B-Instruct/Qwen2.5-VL-7B-Instruct/Qwen3-8B-Base)'
|
description: 'model:'
|
||||||
required: true
|
required: true
|
||||||
type: choice
|
type: choice
|
||||||
options:
|
options:
|
||||||
- all
|
- all
|
||||||
- Qwen/Qwen2.5-7B-Instruct
|
- Qwen/Qwen2.5-7B-Instruct
|
||||||
- meta-llama/Llama-3.1-8B-Instruct
|
|
||||||
- Qwen/Qwen2.5-VL-7B-Instruct
|
- Qwen/Qwen2.5-VL-7B-Instruct
|
||||||
- Qwen/Qwen3-8B-Base
|
- Qwen/Qwen3-8B-Base
|
||||||
default: 'all'
|
default: 'all'
|
||||||
@@ -47,27 +62,73 @@ defaults:
|
|||||||
run:
|
run:
|
||||||
shell: bash -el {0}
|
shell: bash -el {0}
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
group: pr-${{ github.event.pull_request.number }}
|
||||||
|
cancel-in-progress: true
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
model_tests:
|
accuracy_tests:
|
||||||
name: Model Test - ${{ matrix.model_name }}
|
# test will be triggered when tag '*-accuracy-test' & 'ready-for-test' or workflow_dispatch job
|
||||||
runs-on: 'linux-arm64-npu-2'
|
if: >-
|
||||||
|
${{
|
||||||
|
(contains(github.event.pull_request.labels.*.name, 'accuracy-test') ||
|
||||||
|
contains(github.event.pull_request.labels.*.name, 'vl-accuracy-test') ||
|
||||||
|
contains(github.event.pull_request.labels.*.name, 'dense-accuracy-test')) &&
|
||||||
|
contains(github.event.pull_request.labels.*.name, 'ready-for-test') ||
|
||||||
|
github.event_name == 'workflow_dispatch'
|
||||||
|
}}
|
||||||
|
runs-on: >-
|
||||||
|
${{
|
||||||
|
(matrix.model_name == 'Qwen/Qwen2.5-VL-7B-Instruct' && 'linux-arm64-npu-4') ||
|
||||||
|
'linux-arm64-npu-2'
|
||||||
|
}}
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
include: ${{ fromJSON(
|
vllm_use_version: [0, 1]
|
||||||
(github.event.inputs.models == 'all' && '[{"model_name":"Qwen/Qwen2.5-7B-Instruct","output_file":"Qwen2.5-7B-Instruct"},{"model_name":"meta-llama/Llama-3.1-8B-Instruct","output_file":"Llama-3.1-8B-Instruct"},{"model_name":"Qwen/Qwen2.5-VL-7B-Instruct","output_file":"Qwen2.5-VL-7B-Instruct"}, {"model_name":"Qwen/Qwen3-8B-Base","output_file":"Qwen3-8B-Base"}]') ||
|
# the accuracy test will run:
|
||||||
(github.event.inputs.models == 'Qwen/Qwen2.5-7B-Instruct' && '[{"model_name":"Qwen/Qwen2.5-7B-Instruct","output_file":"Qwen2.5-7B-Instruct"}]') ||
|
# 1. workflow_dispatch with models input
|
||||||
(github.event.inputs.models == 'meta-llama/Llama-3.1-8B-Instruct' && '[{"model_name":"meta-llama/Llama-3.1-8B-Instruct","output_file":"Llama-3.1-8B-Instruct"}]') ||
|
# - all: Qwen/Qwen2.5-7B-Instruct, Qwen/Qwen2.5-VL-7B-Instruct, Qwen/Qwen3-8B-Base
|
||||||
(github.event.inputs.models == 'Qwen/Qwen2.5-VL-7B-Instruct' && '[{"model_name":"Qwen/Qwen2.5-VL-7B-Instruct","output_file":"Qwen2.5-VL-7B-Instruct"}]') ||
|
# - specified but not all: Qwen/Qwen2.5-7B-Instruct, Qwen/Qwen2.5-VL-7B-Instruct, Qwen/Qwen3-8B-Base
|
||||||
(github.event.inputs.models == 'Qwen/Qwen3-8B-Base' && '[{"model_name":"Qwen/Qwen3-8B-Base","output_file":"Qwen3-8B-Base"}]')
|
# 2. PR labeled with "*-accuracy-test"
|
||||||
|
# - accuracy-test: Qwen/Qwen2.5-7B-Instruct, Qwen/Qwen2.5-VL-7B-Instruct
|
||||||
|
# - dense-accuracy-test: Qwen/Qwen2.5-7B-Instruct
|
||||||
|
# - vl-accuracy-test: Qwen/Qwen2.5-VL-7B-Instruct
|
||||||
|
model_name: ${{ fromJSON(
|
||||||
|
(github.event.inputs.models == 'all' &&
|
||||||
|
'["Qwen/Qwen2.5-7B-Instruct","Qwen/Qwen2.5-VL-7B-Instruct","model_name":"Qwen/Qwen3-8B-Base"]') ||
|
||||||
|
(github.event.inputs.models == 'Qwen/Qwen2.5-7B-Instruct' &&
|
||||||
|
'["Qwen/Qwen2.5-7B-Instruct"]') ||
|
||||||
|
(github.event.inputs.models == 'Qwen/Qwen2.5-VL-7B-Instruct' &&
|
||||||
|
'["Qwen/Qwen2.5-VL-7B-Instruct"]') ||
|
||||||
|
(github.event.inputs.models == 'Qwen/Qwen3-8B-Base' &&
|
||||||
|
'["Qwen/Qwen3-8B-Base"]') ||
|
||||||
|
contains(github.event.pull_request.labels.*.name, 'accuracy-test') &&
|
||||||
|
'["Qwen/Qwen2.5-7B-Instruct","Qwen/Qwen2.5-VL-7B-Instruct"]' ||
|
||||||
|
contains(github.event.pull_request.labels.*.name, 'dense-accuracy-test') &&
|
||||||
|
'["Qwen/Qwen2.5-7B-Instruct"]' ||
|
||||||
|
contains(github.event.pull_request.labels.*.name, 'vl-accuracy-test') &&
|
||||||
|
'["Qwen/Qwen2.5-VL-7B-Instruct"]'
|
||||||
) }}
|
) }}
|
||||||
fail-fast: false
|
# Remove exclude after https://github.com/vllm-project/vllm-ascend/issues/1044 resolved
|
||||||
|
exclude:
|
||||||
|
- model_name: Qwen/Qwen2.5-VL-7B-Instruct
|
||||||
|
vllm_use_version: 1
|
||||||
|
|
||||||
|
fail-fast: false
|
||||||
|
name: ${{ matrix.model_name }} accuracy V${{ matrix.vllm_use_version }}
|
||||||
container:
|
container:
|
||||||
image: m.daocloud.io/quay.io/ascend/cann:8.1.rc1-910b-ubuntu22.04-py3.10
|
image: m.daocloud.io/quay.io/ascend/cann:8.1.rc1-910b-ubuntu22.04-py3.10
|
||||||
env:
|
env:
|
||||||
HF_ENDPOINT: https://hf-mirror.com
|
HF_ENDPOINT: https://hf-mirror.com
|
||||||
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
||||||
DATASET_SOURCE: ModelScope
|
DATASET_SOURCE: ModelScope
|
||||||
|
VLLM_USE_MODELSCOPE: True
|
||||||
|
# 1. If version specified (work_dispatch), do specified branch accuracy test
|
||||||
|
# 2. If no version (labeled PR), do accuracy test by default ref:
|
||||||
|
# The branch, tag or SHA to checkout. When checking out the repository that
|
||||||
|
# triggered a workflow, this defaults to the reference or SHA for that event.
|
||||||
|
# Otherwise, uses the default branch.
|
||||||
|
GHA_VLLM_ASCEND_VERSION: ${{ github.event.inputs.vllm-ascend-version }}
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout repository
|
- name: Checkout repository
|
||||||
@@ -96,53 +157,30 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
repository: vllm-project/vllm
|
repository: vllm-project/vllm
|
||||||
path: ./vllm-empty
|
path: ./vllm-empty
|
||||||
ref: ${{ github.event.inputs.vllm-version }}
|
# Please also update this when bump matched version
|
||||||
|
ref: ${{ github.event.inputs.vllm-version || 'v0.9.0' }}
|
||||||
|
|
||||||
- name: Install vllm-project/vllm from source
|
- name: Install vllm-project/vllm from source
|
||||||
working-directory: ./vllm-empty
|
working-directory: ./vllm-empty
|
||||||
run: VLLM_TARGET_DEVICE=empty pip install -e .
|
run: VLLM_TARGET_DEVICE=empty pip install -e .
|
||||||
|
|
||||||
|
|
||||||
- name: Checkout vllm-project/vllm-ascend repo
|
- name: Checkout vllm-project/vllm-ascend repo
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
with:
|
with:
|
||||||
repository: vllm-project/vllm-ascend
|
repository: vllm-project/vllm-ascend
|
||||||
path: ./vllm-ascend
|
path: ./vllm-ascend
|
||||||
ref: ${{ github.event.inputs.vllm-ascend-version }}
|
ref: ${{ env.GHA_VLLM_ASCEND_VERSION }}
|
||||||
fetch-depth: 0
|
|
||||||
|
|
||||||
- name: Install pta
|
|
||||||
run: |
|
|
||||||
if [ ! -d /root/.cache/pta ]; then
|
|
||||||
mkdir -p /root/.cache/pta
|
|
||||||
fi
|
|
||||||
if [ ! -f /root/.cache/pta/torch_npu-2.5.1.dev20250320-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl ]; then
|
|
||||||
cd /root/.cache/pta
|
|
||||||
rm -rf pytorch_v2.5.1_py310*
|
|
||||||
wget https://pytorch-package.obs.cn-north-4.myhuaweicloud.com/pta/Daily/v2.5.1/20250320.3/pytorch_v2.5.1_py310.tar.gz
|
|
||||||
tar -zxvf pytorch_v2.5.1_py310.tar.gz
|
|
||||||
fi
|
|
||||||
pip install /root/.cache/pta/torch_npu-2.5.1.dev20250320-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl
|
|
||||||
|
|
||||||
- name: Install vllm-project/vllm-ascend
|
- name: Install vllm-project/vllm-ascend
|
||||||
working-directory: ./vllm-ascend
|
working-directory: ./vllm-ascend
|
||||||
run: |
|
run: |
|
||||||
pip install -r requirements-dev.txt
|
pip install -r requirements-dev.txt
|
||||||
pip install -e .
|
pip install -e .
|
||||||
|
|
||||||
- name: Checkout EleutherAI/lm-evaluation-harness repo
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
repository: EleutherAI/lm-evaluation-harness
|
|
||||||
path: ./lm-eval
|
|
||||||
fetch-depth: 0
|
|
||||||
|
|
||||||
- name: Install EleutherAI/lm-evaluation-harness
|
- name: Install lm-eval, ray, and datasets
|
||||||
working-directory: ./lm-eval
|
|
||||||
run: |
|
run: |
|
||||||
pip install -e .
|
pip install lm-eval
|
||||||
pip install ray datasets==2.16.0
|
|
||||||
|
|
||||||
- name: Collect version info
|
- name: Collect version info
|
||||||
run: |
|
run: |
|
||||||
for dir in /usr/local/Ascend/ascend-toolkit/*; do
|
for dir in /usr/local/Ascend/ascend-toolkit/*; do
|
||||||
@@ -153,45 +191,57 @@ jobs:
|
|||||||
fi
|
fi
|
||||||
done
|
done
|
||||||
INFO_FILE="/usr/local/Ascend/ascend-toolkit/${TOOLKIT_DIR}/$(uname -i)-linux/ascend_toolkit_install.info"
|
INFO_FILE="/usr/local/Ascend/ascend-toolkit/${TOOLKIT_DIR}/$(uname -i)-linux/ascend_toolkit_install.info"
|
||||||
CANN_VERSION=$(grep "version=" "$INFO_FILE" \
|
GHA_CANN_VERSION=$(grep "version=" "$INFO_FILE" \
|
||||||
| head -n1 \
|
| head -n1 \
|
||||||
| cut -d'=' -f2 \
|
| cut -d'=' -f2 \
|
||||||
| tr -d '"')
|
| tr -d '"')
|
||||||
{
|
{
|
||||||
echo "CANN_VERSION=$CANN_VERSION"
|
echo "GHA_CANN_VERSION=$GHA_CANN_VERSION"
|
||||||
pip show torch | grep "Version:" | awk '{print "TORCH_VERSION="$2}'
|
pip show torch | grep "Version:" | awk '{print "GHA_TORCH_VERSION="$2}'
|
||||||
pip show torch_npu | grep "Version:" | awk '{print "TORCH_NPU_VERSION="$2}'
|
pip show torch_npu | grep "Version:" | awk '{print "GHA_TORCH_NPU_VERSION="$2}'
|
||||||
pip show vllm | grep "Version:" | awk '{print "VLLM_VERSION="$2}' | sed 's/+.*//'
|
pip show vllm | grep "Version:" | awk '{print "GHA_VLLM_VERSION="$2}' | sed 's/+.*//'
|
||||||
} >> "$GITHUB_ENV"
|
} >> "$GITHUB_ENV"
|
||||||
|
|
||||||
- name: Print versions
|
- name: Print versions
|
||||||
run: |
|
run: |
|
||||||
echo "CANN: ${{ env.CANN_VERSION }}"
|
echo "CANN: ${{ env.GHA_CANN_VERSION }}"
|
||||||
echo "Torch NPU: ${{ env.TORCH_NPU_VERSION }}"
|
echo "Torch NPU: ${{ env.GHA_TORCH_NPU_VERSION }}"
|
||||||
echo "Torch: ${{ env.TORCH_VERSION }}"
|
echo "Torch: ${{ env.GHA_TORCH_VERSION }}"
|
||||||
echo "vLLM: ${{ env.VLLM_VERSION }}"
|
echo "vLLM: ${{ env.GHA_VLLM_VERSION }}"
|
||||||
|
echo "vLLM Ascend: ${{ env.GHA_VLLM_ASCEND_VERSION || github.ref }}"
|
||||||
|
|
||||||
- name: Run Accuracy Test for V0
|
- name: Run Accuracy Test for V${{ matrix.vllm_use_version }}
|
||||||
|
id: report
|
||||||
working-directory: ./benchmarks
|
working-directory: ./benchmarks
|
||||||
env:
|
env:
|
||||||
VLLM_USE_V1: 0
|
|
||||||
PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256
|
PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256
|
||||||
|
VLLM_USE_V1: ${{ matrix.vllm_use_version }}
|
||||||
run: |
|
run: |
|
||||||
mkdir -p ./accuracy/V0
|
model_base_name=$(basename ${{ matrix.model_name }})
|
||||||
|
markdown_name="${model_base_name}-V${{ matrix.vllm_use_version }}"
|
||||||
|
echo "markdown_name=$markdown_name"
|
||||||
|
echo "markdown_name=$markdown_name" >> $GITHUB_OUTPUT
|
||||||
|
mkdir -p ./accuracy
|
||||||
|
|
||||||
python ./scripts/run_accuracy.py \
|
python ./scripts/run_accuracy.py \
|
||||||
--model "${{ matrix.model_name }}" \
|
--model "${{ matrix.model_name }}" \
|
||||||
--output "./accuracy/V0/${{ matrix.output_file }}.md" \
|
--output "./accuracy/${markdown_name}.md" \
|
||||||
--vllm_ascend_version "${{ github.event.inputs.vllm-ascend-version }}" \
|
--vllm_ascend_version "${{ env.GHA_VLLM_ASCEND_VERSION || github.ref }}" \
|
||||||
--cann_version "${{ env.CANN_VERSION }}" \
|
--cann_version "${{ env.GHA_CANN_VERSION }}" \
|
||||||
--torch_npu_version "${{ env.TORCH_NPU_VERSION }}" \
|
--torch_npu_version "${{ env.GHA_TORCH_NPU_VERSION }}" \
|
||||||
--torch_version "${{ env.TORCH_VERSION }}" \
|
--torch_version "${{ env.GHA_TORCH_VERSION }}" \
|
||||||
--vllm_version "${{ env.VLLM_VERSION }}"
|
--vllm_version "${{ env.GHA_VLLM_VERSION }}"
|
||||||
|
|
||||||
- name: Upload Report for V0
|
- name: Generate step summary
|
||||||
|
if: ${{ always() }}
|
||||||
|
run: |
|
||||||
|
cat ./benchmarks/accuracy/${{ steps.report.outputs.markdown_name }}.md >> $GITHUB_STEP_SUMMARY
|
||||||
|
|
||||||
|
- name: Upload Report for V${{ matrix.vllm_use_version }}
|
||||||
uses: actions/upload-artifact@v4
|
uses: actions/upload-artifact@v4
|
||||||
with:
|
with:
|
||||||
name: "${{ github.event.inputs.vllm-ascend-version }}-${{ matrix.output_file }}-V0-report"
|
name: "${{ env.GHA_VLLM_ASCEND_VERSION }}-${{ steps.report.outputs.markdown_name }}-report"
|
||||||
path: ./benchmarks/accuracy/V0/${{ matrix.output_file }}.md
|
path: ./benchmarks/accuracy/${{ steps.report.outputs.markdown_name }}.md
|
||||||
if-no-files-found: warn
|
if-no-files-found: warn
|
||||||
retention-days: 90
|
retention-days: 90
|
||||||
overwrite: true
|
overwrite: true
|
||||||
|
|||||||
@@ -26,11 +26,8 @@ from multiprocessing import Queue
|
|||||||
import lm_eval
|
import lm_eval
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
UNIMODAL_MODEL_NAME = [
|
UNIMODAL_MODEL_NAME = ["Qwen/Qwen2.5-7B-Instruct", "Qwen/Qwen3-8B"]
|
||||||
"Qwen/Qwen2.5-7B-Instruct", "meta-llama/Llama-3.1-8B-Instruct",
|
UNIMODAL_TASK = ["ceval-valid", "gsm8k"]
|
||||||
"Qwen/Qwen3-8B"
|
|
||||||
]
|
|
||||||
UNIMODAL_TASK = ["ceval-valid", "mmlu", "gsm8k"]
|
|
||||||
MULTIMODAL_NAME = ["Qwen/Qwen2.5-VL-7B-Instruct"]
|
MULTIMODAL_NAME = ["Qwen/Qwen2.5-VL-7B-Instruct"]
|
||||||
MULTIMODAL_TASK = ["mmmu_val"]
|
MULTIMODAL_TASK = ["mmmu_val"]
|
||||||
|
|
||||||
@@ -38,22 +35,17 @@ batch_size_dict = {"ceval-valid": 1, "mmlu": 1, "gsm8k": "auto", "mmmu_val": 1}
|
|||||||
|
|
||||||
MODEL_RUN_INFO = {
|
MODEL_RUN_INFO = {
|
||||||
"Qwen/Qwen2.5-7B-Instruct":
|
"Qwen/Qwen2.5-7B-Instruct":
|
||||||
("export MODEL_AEGS='{model}, max_model_len=4096,dtype=auto,tensor_parallel_size=2,gpu_memory_utilization=0.6'\n"
|
("export MODEL_ARGS='pretrained={model}, max_model_len=4096,dtype=auto,tensor_parallel_size=2,gpu_memory_utilization=0.6'\n"
|
||||||
"lm_eval --model vllm --modlel_args $MODEL_ARGS --tasks {datasets} \ \n"
|
"lm_eval --model vllm --modlel_args $MODEL_ARGS --tasks {datasets} \ \n"
|
||||||
"--apply_chat_template --fewshot_as_multiturn --num_fewshot 5 --batch_size 1"
|
"--apply_chat_template --fewshot_as_multiturn --num_fewshot 5 --batch_size 1"
|
||||||
),
|
),
|
||||||
"LLM-Research/Meta-Llama-3.1-8B-Instruct":
|
"Qwen/Qwen3-8B-Base":
|
||||||
("export MODEL_AEGS='{model}, max_model_len=4096,dtype=auto,tensor_parallel_size=2,gpu_memory_utilization=0.6'\n"
|
("export MODEL_ARGS='pretrained={model}, max_model_len=4096,dtype=auto,tensor_parallel_size=2,gpu_memory_utilization=0.6'\n"
|
||||||
"lm_eval --model vllm --modlel_args $MODEL_ARGS --tasks {datasets} \ \n"
|
|
||||||
"--apply_chat_template --fewshot_as_multiturn --num_fewshot 5 --batch_size 1"
|
|
||||||
),
|
|
||||||
"Qwen/Qwen3-8B":
|
|
||||||
("export MODEL_AEGS='{model}, max_model_len=4096,dtype=auto,tensor_parallel_size=2,gpu_memory_utilization=0.6'\n"
|
|
||||||
"lm_eval --model vllm --modlel_args $MODEL_ARGS --tasks {datasets} \ \n"
|
"lm_eval --model vllm --modlel_args $MODEL_ARGS --tasks {datasets} \ \n"
|
||||||
"--apply_chat_template --fewshot_as_multiturn --num_fewshot 5 --batch_size 1"
|
"--apply_chat_template --fewshot_as_multiturn --num_fewshot 5 --batch_size 1"
|
||||||
),
|
),
|
||||||
"Qwen/Qwen2.5-VL-7B-Instruct":
|
"Qwen/Qwen2.5-VL-7B-Instruct":
|
||||||
("export MODEL_AEGS='{model}, max_model_len=8192,dtype=auto,tensor_parallel_size=2,max_images=2'\n"
|
("export MODEL_ARGS='pretrained={model}, max_model_len=8192,dtype=auto,tensor_parallel_size=4,max_images=2'\n"
|
||||||
"lm_eval --model vllm-vlm --modlel_args $MODEL_ARGS --tasks {datasets} \ \n"
|
"lm_eval --model vllm-vlm --modlel_args $MODEL_ARGS --tasks {datasets} \ \n"
|
||||||
"--apply_chat_template --fewshot_as_multiturn --batch_size 1"),
|
"--apply_chat_template --fewshot_as_multiturn --batch_size 1"),
|
||||||
}
|
}
|
||||||
@@ -85,7 +77,7 @@ def run_accuracy_unimodal(queue, model, dataset):
|
|||||||
|
|
||||||
def run_accuracy_multimodal(queue, model, dataset):
|
def run_accuracy_multimodal(queue, model, dataset):
|
||||||
try:
|
try:
|
||||||
model_args = f"pretrained={model},max_model_len=8192,dtype=auto,tensor_parallel_size=2,max_images=2"
|
model_args = f"pretrained={model},max_model_len=8192,dtype=auto,tensor_parallel_size=4,max_images=2"
|
||||||
results = lm_eval.simple_evaluate(
|
results = lm_eval.simple_evaluate(
|
||||||
model="vllm-vlm",
|
model="vllm-vlm",
|
||||||
model_args=model_args,
|
model_args=model_args,
|
||||||
@@ -110,7 +102,7 @@ def generate_md(model_name, tasks_list, args, datasets):
|
|||||||
run_cmd = MODEL_RUN_INFO[model_name].format(model=model_name,
|
run_cmd = MODEL_RUN_INFO[model_name].format(model=model_name,
|
||||||
datasets=datasets)
|
datasets=datasets)
|
||||||
model = model_name.split("/")[1]
|
model = model_name.split("/")[1]
|
||||||
preamble = f"""# {model} Accuracy Test
|
preamble = f"""# 🎯 {model} Accuracy Test
|
||||||
<div>
|
<div>
|
||||||
<strong>vLLM version:</strong> vLLM: {args.vllm_version}, vLLM Ascend: {args.vllm_ascend_version} <br>
|
<strong>vLLM version:</strong> vLLM: {args.vllm_version}, vLLM Ascend: {args.vllm_ascend_version} <br>
|
||||||
</div>
|
</div>
|
||||||
@@ -228,4 +220,7 @@ if __name__ == "__main__":
|
|||||||
parser.add_argument("--vllm_version", type=str, required=False)
|
parser.add_argument("--vllm_version", type=str, required=False)
|
||||||
parser.add_argument("--cann_version", type=str, required=False)
|
parser.add_argument("--cann_version", type=str, required=False)
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
# TODO(yikun):
|
||||||
|
# 1. add a exit 1 if accuracy is not as expected
|
||||||
|
# 2. Add ✅, ❌ to markdown if accuracy is not as expected
|
||||||
main(args)
|
main(args)
|
||||||
|
|||||||
Reference in New Issue
Block a user