Files
xc-llm-ascend/.github/workflows/vllm_ascend_test.yaml
wangxiyuan fef18b60bc Refactor e2e CI (#2276)
Refactor E2E CI to make it clear and faster
1. remove some uesless e2e test
2. remove some uesless function
3. Make sure all test runs with VLLMRunner to avoid oom error
4. Make sure all ops test end with torch.empty_cache to avoid oom error
5. run the test one by one to avoid resource limit error


- vLLM version: v0.10.1.1
- vLLM main:
a344a5aa0a

Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
2025-09-02 09:02:22 +08:00

298 lines
11 KiB
YAML

#
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# This file is a part of the vllm-ascend project.
#
name: 'test'
on:
push:
branches:
- 'main'
pull_request:
branches:
- 'main'
- '*-dev'
# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
# declared as "shell: bash -el {0}" on steps that need to be properly activated.
# It's used to activate ascend-toolkit environment variables.
defaults:
run:
shell: bash -el {0}
# only cancel in-progress runs of the same workflow
# and ignore the lint / 1 card / 4 cards test type
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
jobs:
lint:
uses: ./.github/workflows/pre-commit.yml
changes:
runs-on: ubuntu-latest
outputs:
e2e_tracker: ${{ steps.filter.outputs.e2e_tracker }}
ut_tracker: ${{ steps.filter.outputs.ut_tracker }}
steps:
- uses: actions/checkout@v4
- uses: dorny/paths-filter@v3
id: filter
with:
filters: |
e2e_tracker:
- '.github/workflows/vllm_ascend_test.yaml'
- 'vllm_ascend/**'
- 'csrc/**'
- 'cmake/**'
- 'tests/e2e/**'
- 'CMakeLists.txt'
- 'setup.py'
- 'requirements.txt'
- 'requirements-dev.txt'
- 'requirements-lint.txt'
- 'packages.txt'
ut_tracker:
- 'tests/ut/**'
ut:
needs: [lint, changes]
name: unit test
# only trigger unit test after lint passed and the change is e2e and ut related.
if: ${{ needs.lint.result == 'success' && (needs.changes.outputs.e2e_tracker == 'true' || needs.changes.outputs.ut_tracker == 'true') }}
runs-on: ubuntu-latest
container:
image: quay.io/ascend/cann:8.2.rc1-910b-ubuntu22.04-py3.11
env:
VLLM_LOGGING_LEVEL: ERROR
VLLM_USE_MODELSCOPE: True
strategy:
matrix:
vllm_version: [v0.10.1.1, main]
steps:
- name: Install packages
run: |
apt-get update -y
apt-get install -y python3-pip git vim wget net-tools gcc g++ cmake libnuma-dev curl gnupg2
- name: Checkout vllm-project/vllm repo
uses: actions/checkout@v4
with:
repository: vllm-project/vllm
ref: ${{ matrix.vllm_version }}
path: ./vllm-empty
- name: Install vllm-project/vllm from source
working-directory: ./vllm-empty
run: |
VLLM_TARGET_DEVICE=empty python3 -m pip install . --extra-index https://download.pytorch.org/whl/cpu/
python3 -m pip uninstall -y triton
- name: Checkout vllm-project/vllm-ascend repo
uses: actions/checkout@v4
- name: Install vllm-project/vllm-ascend
run: |
export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/x86_64-linux/devlib
python3 -m pip install -r requirements-dev.txt --extra-index https://download.pytorch.org/whl/cpu/
python3 -m pip install -v . --extra-index https://download.pytorch.org/whl/cpu/
- name: Run unit test
env:
VLLM_WORKER_MULTIPROC_METHOD: spawn
TORCH_DEVICE_BACKEND_AUTOLOAD: 0
run: |
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/x86_64-linux/devlib
pytest -sv --cov --cov-report=xml:unittests-coverage.xml tests/ut
- name: Upload coverage to Codecov
if: ${{ matrix.vllm_version == 'main' }}
uses: codecov/codecov-action@v5
env:
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
with:
flags: unittests
name: vllm-ascend
verbose: true
e2e:
needs: [lint, changes]
# only trigger e2e test after lint passed and the change is e2e related with pull request.
if: ${{ github.event_name == 'pull_request' && needs.lint.result == 'success' && needs.changes.outputs.e2e_tracker == 'true' }}
strategy:
max-parallel: 2
matrix:
os: [linux-aarch64-a2-1]
vllm_version: [v0.10.1.1, main]
name: singlecard e2e test
runs-on: ${{ matrix.os }}
container:
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11
env:
VLLM_LOGGING_LEVEL: ERROR
VLLM_USE_MODELSCOPE: True
steps:
- name: Check npu and CANN info
run: |
npu-smi info
cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
- name: Config mirrors
run: |
sed -Ei 's@(ports|archive).ubuntu.com@cache-service.nginx-pypi-cache.svc.cluster.local:8081@g' /etc/apt/sources.list
pip config set global.index-url http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple
pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local
apt-get update -y
apt install git -y
- name: Checkout vllm-project/vllm-ascend repo
uses: actions/checkout@v4
- name: Install system dependencies
run: |
apt-get -y install `cat packages.txt`
apt-get -y install gcc g++ cmake libnuma-dev
- name: Checkout vllm-project/vllm repo
uses: actions/checkout@v4
with:
repository: vllm-project/vllm
ref: ${{ matrix.vllm_version }}
path: ./vllm-empty
- name: Install vllm-project/vllm from source
working-directory: ./vllm-empty
run: |
VLLM_TARGET_DEVICE=empty pip install -e .
- name: Install vllm-project/vllm-ascend
env:
PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi
run: |
pip install -r requirements-dev.txt
pip install -v -e .
- name: Run e2e test
env:
VLLM_WORKER_MULTIPROC_METHOD: spawn
VLLM_USE_MODELSCOPE: True
run: |
# We found that if running aclgraph tests in batch, it will cause AclmdlRICaptureBegin error. So we run
# the test separately.
pytest -sv tests/e2e/singlecard/test_aclgraph.py
pytest -sv tests/e2e/singlecard/test_ascend_scheduler.py
pytest -sv tests/e2e/singlecard/test_camem.py
pytest -sv tests/e2e/singlecard/test_chunked.py
pytest -sv tests/e2e/singlecard/test_embedding.py
pytest -sv tests/e2e/singlecard/test_guided_decoding.py
# TODO: Fix lora accuracy error
# pytest -sv tests/e2e/singlecard/test_ilama_lora.py
pytest -sv tests/e2e/singlecard/test_profile_execute_duration.py
pytest -sv tests/e2e/singlecard/test_quantization.py
pytest -sv tests/e2e/singlecard/test_sampler.py
pytest -sv tests/e2e/singlecard/test_vlm.py
# ------------------------------------ v1 spec decode test ------------------------------------ #
pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py
# TODO: revert me when test_v1_spec_decode.py::test_ngram_correctness is fixed
pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py
pytest -sv tests/e2e/singlecard/ops/
e2e-2-cards:
needs: [e2e]
if: ${{ needs.e2e.result == 'success' }}
strategy:
max-parallel: 2
matrix:
os: [linux-aarch64-a2-2]
vllm_version: [v0.10.1.1, main]
name: multicard e2e test
runs-on: ${{ matrix.os }}
container:
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11
env:
VLLM_LOGGING_LEVEL: ERROR
VLLM_USE_MODELSCOPE: True
steps:
- name: Check npu and CANN info
run: |
npu-smi info
cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
- name: Config mirrors
run: |
sed -Ei 's@(ports|archive).ubuntu.com@cache-service.nginx-pypi-cache.svc.cluster.local:8081@g' /etc/apt/sources.list
pip config set global.index-url http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple
pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local
apt-get update -y
apt install git -y
- name: Checkout vllm-project/vllm-ascend repo
uses: actions/checkout@v4
- name: Install system dependencies
run: |
apt-get -y install `cat packages.txt`
apt-get -y install gcc g++ cmake libnuma-dev
- name: Checkout vllm-project/vllm repo
uses: actions/checkout@v4
with:
repository: vllm-project/vllm
ref: ${{ matrix.vllm_version }}
path: ./vllm-empty
- name: Install vllm-project/vllm from source
working-directory: ./vllm-empty
run: |
VLLM_TARGET_DEVICE=empty pip install -e .
- name: Install vllm-project/vllm-ascend
env:
PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi
run: |
pip install -r requirements-dev.txt
pip install -v -e .
- name: Run vllm-project/vllm-ascend test
env:
VLLM_WORKER_MULTIPROC_METHOD: spawn
VLLM_USE_MODELSCOPE: True
run: |
pytest -sv tests/e2e/multicard/test_data_parallel.py
pytest -sv tests/e2e/multicard/test_expert_parallel.py
# external_launcher test is not stable enough. Fix it later
# pytest -sv tests/e2e/multicard/test_external_launcher.py
pytest -sv tests/e2e/multicard/test_fused_moe_allgather_ep.py
# pytest -sv tests/e2e/multicard/test_ilama_lora_tp2.py
# To avoid oom, we need to run the test in a single process.
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_QwQ
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_multistream_moe
#pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_pangu
#pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen3_W8A8
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Qwen3_W4A8DYNAMIC
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_W4A8DYNAMIC
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_sp_for_qwen3_moe
#pytest -sv tests/e2e/multicard/test_pipeline_parallel.py
#pytest -sv tests/e2e/multicard/test_prefix_caching.py
#pytest -sv tests/e2e/multicard/test_qwen3_moe.py
#pytest -sv tests/e2e/multicard/test_torchair_graph_mode.py