[CI] Add unit test framework (#1201)

This PR added the unit test framework to enable ut for vLLM Ascend. Unit
test runs on CPU machines. It'll be ran once lint check is passed the
same as e2e test.

For unit test, this PR created a new folder called `ut` under `tests`
module. All the test file in `ut` should keep the same with the code in
`vllm-ascend`. The file name should be start with `test_` prefix. For
example, in this PR. the `test_ascend_config.py` is added for
`ascend_config.py` test.

A new fille `worker/test_worker_v1.py` is also added as the placeholder.
This file should be the unit test for `vllm-ascend/worker/worker_v1.py`.

Additional, a new `fake_weight` folder is added, it contains the
config.json from `facebook/opt-125m`, so that the test will not always
visit huggingface.

TODO:
We should add all the unit test file one by one in the future.

Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
This commit is contained in:
wangxiyuan
2025-06-16 18:32:28 +08:00
committed by GitHub
parent 966557a2a3
commit 69b817ed65
57 changed files with 396 additions and 267 deletions

View File

@@ -15,7 +15,7 @@
# This file is a part of the vllm-ascend project.
#
name: 'e2e test / basic'
name: 'test'
on:
schedule:
@@ -114,6 +114,56 @@ jobs:
echo "::add-matcher::.github/workflows/matchers/mypy.json"
tools/mypy.sh 1 ${{ matrix.python-version }}
ut:
needs: [lint]
name: unit test
if: ${{ needs.lint.result == 'success' }}
runs-on: ubuntu-latest
container:
image: m.daocloud.io/quay.io/ascend/cann:8.1.rc1-910b-ubuntu22.04-py3.10
env:
VLLM_LOGGING_LEVEL: ERROR
VLLM_USE_MODELSCOPE: True
strategy:
matrix:
vllm_version: [main, v0.9.1]
steps:
- name: Install packages
run: |
apt-get update -y
apt-get install -y python3-pip git vim wget net-tools gcc g++ cmake libnuma-dev
- name: Checkout vllm-project/vllm repo
uses: actions/checkout@v4
with:
repository: vllm-project/vllm
ref: ${{ matrix.vllm_version }}
path: ./vllm-empty
- name: Install vllm-project/vllm from source
working-directory: ./vllm-empty
run: |
VLLM_TARGET_DEVICE=empty python3 -m pip install . --extra-index https://download.pytorch.org/whl/cpu/
python3 -m pip uninstall -y triton
- name: Checkout vllm-project/vllm-ascend repo
uses: actions/checkout@v4
- name: Install vllm-project/vllm-ascend
run: |
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/x86_64-linux/devlib
python3 -m pip install -r requirements-dev.txt --extra-index https://download.pytorch.org/whl/cpu/
python3 -m pip install -v . --extra-index https://download.pytorch.org/whl/cpu/
- name: Run unit test for V1 Engine
env:
VLLM_USE_V1: 1
VLLM_WORKER_MULTIPROC_METHOD: spawn
TORCH_DEVICE_BACKEND_AUTOLOAD: 0
run: |
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/x86_64-linux/devlib
pytest -sv tests/ut
e2e:
needs: [lint]
if: ${{ needs.lint.result == 'success' }}
@@ -122,7 +172,7 @@ jobs:
matrix:
os: [linux-arm64-npu-1]
vllm_version: [main, v0.9.1]
name: vLLM Ascend test
name: singlecard e2e test
runs-on: ${{ matrix.os }}
container:
# TODO(yikun): Remove m.daocloud.io prefix when infra proxy ready
@@ -168,53 +218,47 @@ jobs:
pip install -r requirements-dev.txt
pip install -v -e .
- name: Run vllm-project/vllm-ascend test for V1 Engine
- name: Run e2e test for V1 Engine
env:
VLLM_USE_V1: 1
VLLM_WORKER_MULTIPROC_METHOD: spawn
VLLM_USE_MODELSCOPE: True
run: |
pytest -sv tests/singlecard/test_offline_inference.py
pytest -sv tests/e2e/singlecard/test_offline_inference.py
# TODO: switch hf to modelscope
VLLM_USE_MODELSCOPE=False HF_ENDPOINT=https://hf-mirror.com \
pytest -sv tests/singlecard/test_ilama_lora.py
pytest -sv tests/e2e/singlecard/test_ilama_lora.py
# TODO(sss): guided decoding doesn't work, fix it later
# pytest -sv tests/singlecard/test_guided_decoding.py
# test_ascend_config.py should be ran separately because it will regenerate the global config many times.
pytest -sv tests/singlecard/test_ascend_config.py
pytest -sv tests/singlecard/test_camem.py
pytest -sv tests/singlecard/ \
--ignore=tests/singlecard/test_offline_inference.py \
--ignore=tests/singlecard/test_ilama_lora.py \
--ignore=tests/singlecard/test_guided_decoding.py \
--ignore=tests/singlecard/test_ascend_config.py \
--ignore=tests/singlecard/test_camem.py
# pytest -sv tests/e2e/singlecard/test_guided_decoding.py
pytest -sv tests/e2e/singlecard/test_camem.py
pytest -sv tests/e2e/singlecard/ \
--ignore=tests/e2e/singlecard/test_offline_inference.py \
--ignore=tests/e2e/singlecard/test_ilama_lora.py \
--ignore=tests/e2e/singlecard/test_guided_decoding.py \
--ignore=tests/e2e/singlecard/test_camem.py
- name: Run vllm-project/vllm-ascend test on V0 engine
- name: Run e2e test on V0 engine
if: ${{ github.event_name == 'schedule' }}
env:
VLLM_USE_V1: 0
VLLM_USE_MODELSCOPE: True
run: |
pytest -sv tests/singlecard/test_offline_inference.py
pytest -sv tests/e2e/singlecard/test_offline_inference.py
# TODO: switch hf to modelscope
VLLM_USE_MODELSCOPE=False HF_ENDPOINT=https://hf-mirror.com \
pytest -sv tests/singlecard/test_ilama_lora.py
pytest -sv tests/e2e/singlecard/test_ilama_lora.py
# guided decoding doesn't work, fix it later
# pytest -sv tests/singlecard/test_guided_decoding.py
pytest -sv tests/singlecard/test_camem.py
# test_ascend_config.py should be ran separately because it will regenerate the global config many times.
pytest -sv tests/singlecard/test_ascend_config.py
pytest -sv tests/singlecard/test_prompt_embedding.py
pytest -sv tests/singlecard/ \
--ignore=tests/singlecard/test_offline_inference.py \
--ignore=tests/singlecard/test_ilama_lora.py \
--ignore=tests/singlecard/test_guided_decoding.py \
--ignore=tests/singlecard/test_camem.py \
--ignore=tests/singlecard/test_ascend_config.py \
--ignore=tests/singlecard/test_prompt_embedding.py \
--ignore=tests/singlecard/core/test_ascend_scheduler.py \
--ignore=tests/singlecard/core/test_ascend_scheduler_e2e.py
# pytest -sv tests/e2e/singlecard/test_guided_decoding.py
pytest -sv tests/e2e/singlecard/test_camem.py
pytest -sv tests/e2e/singlecard/test_prompt_embedding.py
pytest -sv tests/e2e/singlecard/ \
--ignore=tests/e2e/singlecard/test_offline_inference.py \
--ignore=tests/e2e/singlecard/test_ilama_lora.py \
--ignore=tests/e2e/singlecard/test_guided_decoding.py \
--ignore=tests/e2e/singlecard/test_camem.py \
--ignore=tests/e2e/singlecard/test_prompt_embedding.py \
--ignore=tests/e2e/singlecard/core/test_ascend_scheduler.py \
--ignore=tests/e2e/singlecard/core/test_ascend_scheduler_e2e.py
e2e-4-cards:
needs: [e2e]
@@ -224,7 +268,7 @@ jobs:
matrix:
os: [linux-arm64-npu-4]
vllm_version: [main, v0.9.1]
name: vLLM Ascend test
name: multicard e2e test
runs-on: ${{ matrix.os }}
container:
# TODO(yikun): Remove m.daocloud.io prefix when infra proxy ready
@@ -279,14 +323,14 @@ jobs:
run: |
# TODO: switch hf to modelscope
VLLM_USE_MODELSCOPE=False HF_ENDPOINT=https://hf-mirror.com \
pytest -sv tests/multicard/test_ilama_lora_tp2.py
# Fixme: run VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py will raise error.
pytest -sv tests/e2e/multicard/test_ilama_lora_tp2.py
# Fixme: run VLLM_USE_MODELSCOPE=True pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py will raise error.
# To avoid oom, we need to run the test in a single process.
pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_QwQ
pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek
pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_topk
pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_W8A8
pytest -sv tests/multicard/ --ignore=tests/multicard/test_ilama_lora_tp2.py --ignore=tests/multicard/test_offline_inference_distributed.py
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_QwQ
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_topk
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_W8A8
pytest -sv tests/e2e/multicard/ --ignore=tests/e2e/multicard/test_ilama_lora_tp2.py --ignore=tests/e2e/multicard/test_offline_inference_distributed.py
- name: Run vllm-project/vllm-ascend test on V0 engine
if: ${{ github.event_name == 'schedule' }}
@@ -296,11 +340,11 @@ jobs:
run: |
# TODO: switch hf to modelscope
VLLM_USE_MODELSCOPE=False HF_ENDPOINT=https://hf-mirror.com \
pytest -sv tests/multicard/test_ilama_lora_tp2.py
# Fixme: run VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py will raise error.
pytest -sv tests/e2e/multicard/test_ilama_lora_tp2.py
# Fixme: run VLLM_USE_MODELSCOPE=True pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py will raise error.
# To avoid oom, we need to run the test in a single process.
pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_QwQ
pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek
pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_topk
pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_W8A8
pytest -sv tests/multicard/ --ignore=tests/multicard/test_ilama_lora_tp2.py --ignore=tests/multicard/test_offline_inference_distributed.py
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_QwQ
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_topk
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_W8A8
pytest -sv tests/e2e/multicard/ --ignore=tests/e2e/multicard/test_ilama_lora_tp2.py --ignore=tests/e2e/multicard/test_offline_inference_distributed.py

View File

@@ -96,12 +96,12 @@ jobs:
run: |
if [[ "${{ matrix.os }}" == "linux-arm64-npu-1" ]]; then
# spec decode test
VLLM_USE_MODELSCOPE=True pytest -sv tests/long_term/spec_decode/e2e/test_v1_mtp_correctness.py
VLLM_USE_MODELSCOPE=True pytest -sv tests/e2e/long_term/spec_decode/e2e/test_v1_mtp_correctness.py
# TODO: revert me when test_v1_spec_decode.py::test_ngram_correctness is fixed
# VLLM_USE_MODELSCOPE=True pytest -sv tests/long_term/spec_decode/e2e/test_v1_spec_decode.py
VLLM_USE_MODELSCOPE=True pytest -sv tests/long_term/spec_decode/e2e/test_mtp_correctness.py # it needs a clean process
pytest -sv tests/long_term/spec_decode --ignore=tests/long_term/spec_decode/e2e/test_mtp_correctness.py --ignore=tests/long_term/spec_decode/e2e/test_v1_spec_decode.py --ignore=tests/long_term/spec_decode/e2e/test_v1_mtp_correctness.py
pytest -sv tests/long_term/test_accuracy.py
# VLLM_USE_MODELSCOPE=True pytest -sv tests/e2e/long_term/spec_decode/e2e/test_v1_spec_decode.py
VLLM_USE_MODELSCOPE=True pytest -sv tests/e2e/long_term/spec_decode/e2e/test_mtp_correctness.py # it needs a clean process
pytest -sv tests/e2e/long_term/spec_decode --ignore=tests/e2e/long_term/spec_decode/e2e/test_mtp_correctness.py --ignore=tests/e2e/long_term/spec_decode/e2e/test_v1_spec_decode.py --ignore=tests/e2e/long_term/spec_decode/e2e/test_v1_mtp_correctness.py
pytest -sv tests/e2e/long_term/test_accuracy.py
else
VLLM_USE_MODELSCOPE=True pytest -sv tests/long_term/test_deepseek_v2_lite_tp2_accuracy.py
VLLM_USE_MODELSCOPE=True pytest -sv tests/e2e/long_term/test_deepseek_v2_lite_tp2_accuracy.py
fi