From 6874d666facae495c3907115581f1a4dea5d9744 Mon Sep 17 00:00:00 2001 From: zhangxinyuehfad <59153331+zhangxinyuehfad@users.noreply.github.com> Date: Wed, 30 Jul 2025 14:52:16 +0800 Subject: [PATCH] [CI]Add e2e test for 310p (#1879) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### What this PR does / why we need it? Add e2e test for 310p: trigger conditions:tag, labels(ready-for-test, e2e-310p-test), schedule image: m.daocloud.io/quay.io/ascend/cann:8.1.rc1-310p-ubuntu22.04-py3.10 runner: linux-aarch64-310p-1, linux-aarch64-310p-4 model: IntervitensInc/pangu-pro-moe-model, Qwen/Qwen3-0.6B-Base, Qwen/Qwen2.5-7B-Instruct - vLLM version: v0.10.0 - vLLM main: https://github.com/vllm-project/vllm/commit/b917da442b820245f537602d752e7146e66dd37a Signed-off-by: hfadzxy --- .github/actionlint.yaml | 3 + .github/workflows/vllm_ascend_test.yaml | 6 +- .github/workflows/vllm_ascend_test_310p.yaml | 117 ++++++++++++++++++ .../multicard/test_offline_inference_310p.py | 62 ++++++++++ .../singlecard/test_offline_inference_310p.py | 44 +++++++ 5 files changed, 230 insertions(+), 2 deletions(-) create mode 100644 .github/workflows/vllm_ascend_test_310p.yaml create mode 100644 tests/e2e/multicard/test_offline_inference_310p.py create mode 100644 tests/e2e/singlecard/test_offline_inference_310p.py diff --git a/.github/actionlint.yaml b/.github/actionlint.yaml index 3b4d23f..312a7ad 100644 --- a/.github/actionlint.yaml +++ b/.github/actionlint.yaml @@ -7,4 +7,7 @@ self-hosted-runner: - linux-aarch64-a2-4 - linux-aarch64-a2-8 - linux-arm64-npu-static-8 + - linux-aarch64-310p-1 + - linux-aarch64-310p-2 + - linux-aarch64-310p-4 - ubuntu-24.04-arm diff --git a/.github/workflows/vllm_ascend_test.yaml b/.github/workflows/vllm_ascend_test.yaml index 580559c..517acfc 100644 --- a/.github/workflows/vllm_ascend_test.yaml +++ b/.github/workflows/vllm_ascend_test.yaml @@ -203,7 +203,8 @@ jobs: --ignore=tests/e2e/singlecard/test_camem.py \ --ignore=tests/e2e/singlecard/test_embedding.py \ --ignore=tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py \ - --ignore=tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py + --ignore=tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py \ + --ignore=tests/e2e/singlecard/test_offline_inference_310p.py # ------------------------------------ v1 spec decode test ------------------------------------ # VLLM_USE_MODELSCOPE=True pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py # TODO: revert me when test_v1_spec_decode.py::test_ngram_correctness is fixed @@ -280,4 +281,5 @@ jobs: pytest -sv tests/e2e/multicard/test_data_parallel.py pytest -sv tests/e2e/multicard/ --ignore=tests/e2e/multicard/test_ilama_lora_tp2.py \ --ignore=tests/e2e/multicard/test_offline_inference_distributed.py \ - --ignore=tests/e2e/multicard/test_data_parallel.py + --ignore=tests/e2e/multicard/test_data_parallel.py \ + --ignore=tests/e2e/multicard/test_offline_inference_310p.py diff --git a/.github/workflows/vllm_ascend_test_310p.yaml b/.github/workflows/vllm_ascend_test_310p.yaml new file mode 100644 index 0000000..5c339f5 --- /dev/null +++ b/.github/workflows/vllm_ascend_test_310p.yaml @@ -0,0 +1,117 @@ +# +# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# This file is a part of the vllm-ascend project. +# + +name: 'e2e test / 310p-test' + +on: + push: + tags: + - 'v*' + schedule: + # Runs every 6 hours + - cron: '0 */6 * * *' + pull_request: + types: [ labeled ] + +# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly +# declared as "shell: bash -el {0}" on steps that need to be properly activated. +# It's used to activate ascend-toolkit environment variables. +defaults: + run: + shell: bash -el {0} + +# only cancel in-progress runs of the same workflow +# and ignore the lint / 1 card / 4 cards test type +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + e2e: + # e2e-310p-test will be triggered when tag 'e2e-310p-test' & 'ready-for-test' or schedule job + if: >- + ${{ + (contains(github.event.pull_request.labels.*.name, 'e2e-310p-test')) && + contains(github.event.pull_request.labels.*.name, 'ready-for-test') || + github.event_name == 'schedule' || github.event_name == 'push' + }} + strategy: + max-parallel: 2 + matrix: + os: [linux-aarch64-310p-1, linux-aarch64-310p-4] + vllm_version: [main, v0.10.0] + name: 310p e2e test + runs-on: ${{ matrix.os }} + container: + # TODO(yikun): Remove m.daocloud.io prefix when infra proxy ready + image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.1.rc1-310p-ubuntu22.04-py3.10 + env: + VLLM_LOGGING_LEVEL: ERROR + VLLM_USE_MODELSCOPE: True + steps: + - name: Check npu and CANN info + run: | + npu-smi info + cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info + + - name: Config mirrors + run: | + sed -Ei 's@(ports|archive).ubuntu.com@cache-service.nginx-pypi-cache.svc.cluster.local:8081@g' /etc/apt/sources.list + pip config set global.index-url http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple + pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local + apt-get update -y + apt install git -y + + - name: Checkout vllm-project/vllm-ascend repo + uses: actions/checkout@v4 + + - name: Install system dependencies + run: | + apt-get -y install `cat packages.txt` + apt-get -y install git vim wget net-tools gcc g++ cmake libnuma-dev curl gnupg2 + + - name: Checkout vllm-project/vllm repo + uses: actions/checkout@v4 + with: + repository: vllm-project/vllm + ref: ${{ matrix.vllm_version }} + path: ./vllm-empty + + - name: Install vllm-project/vllm from source + working-directory: ./vllm-empty + run: | + VLLM_TARGET_DEVICE=empty pip install -e . + + - name: Install vllm-project/vllm-ascend + run: | + export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi + export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/x86_64-linux/devlib + export SOC_VERSION=ASCEND310P3 + pip install -r requirements-dev.txt + pip install -v -e . + + - name: Run e2e test + env: + VLLM_WORKER_MULTIPROC_METHOD: spawn + VLLM_USE_MODELSCOPE: True + PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256 + run: | + if [[ "${{ matrix.os }}" == "linux-aarch64-310p-1" ]]; then + pytest -sv tests/e2e/singlecard/test_offline_inference_310p.py + else + pytest -sv tests/e2e/multicard/test_offline_inference_310p.py + fi \ No newline at end of file diff --git a/tests/e2e/multicard/test_offline_inference_310p.py b/tests/e2e/multicard/test_offline_inference_310p.py new file mode 100644 index 0000000..6bf3356 --- /dev/null +++ b/tests/e2e/multicard/test_offline_inference_310p.py @@ -0,0 +1,62 @@ +# +# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved. +# Copyright 2023 The vLLM team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# This file is a part of the vllm-ascend project. +import pytest +import vllm # noqa: F401 + +import vllm_ascend # noqa: F401 +from tests.e2e.conftest import VllmRunner + +# Pangu local model path +MODELS = [ + "IntervitensInc/pangu-pro-moe-model", +] +# set additional config for ascend scheduler and torchair graph +ADDITIONAL_CONFIG = [{ + "additional_config": { + "torchair_graph_config": { + "enabled": True + }, + "ascend_scheduler_config": { + "enabled": True, + } + } +}] + + +@pytest.mark.parametrize("model", MODELS) +@pytest.mark.parametrize("dtype", ["float16"]) +@pytest.mark.parametrize("max_tokens", [5]) +@pytest.mark.parametrize("enfore_eager", [True, False]) +@pytest.mark.parametrize("additional_config", ADDITIONAL_CONFIG) +def test_pangu_model(model: str, dtype: str, max_tokens: int, + enfore_eager: bool, additional_config: dict) -> None: + if enfore_eager: + additional_config = {} + example_prompts = [ + "Hello, my name is", + "The future of AI is", + ] + + with VllmRunner(model, + tensor_parallel_size=4, + dtype=dtype, + max_model_len=1024, + enforce_eager=True, + enable_expert_parallel=True, + additional_config=additional_config, + distributed_executor_backend="mp") as vllm_model: + vllm_model.generate_greedy(example_prompts, max_tokens) diff --git a/tests/e2e/singlecard/test_offline_inference_310p.py b/tests/e2e/singlecard/test_offline_inference_310p.py new file mode 100644 index 0000000..a003c5f --- /dev/null +++ b/tests/e2e/singlecard/test_offline_inference_310p.py @@ -0,0 +1,44 @@ +# +# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved. +# Copyright 2023 The vLLM team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# This file is a part of the vllm-ascend project. +import pytest +import vllm # noqa: F401 + +import vllm_ascend # noqa: F401 +from tests.e2e.conftest import VllmRunner + +MODELS = ["Qwen/Qwen3-0.6B-Base", "Qwen/Qwen2.5-7B-Instruct"] + + +@pytest.mark.parametrize("model", MODELS) +@pytest.mark.parametrize("dtype", ["float16"]) +@pytest.mark.parametrize("max_tokens", [5]) +def test_models(model: str, dtype: str, max_tokens: int) -> None: + example_prompts = [ + "Hello, my name is", + "The future of AI is", + ] + + with VllmRunner(model, + tensor_parallel_size=1, + dtype=dtype, + max_model_len=2048, + enforce_eager=True, + compilation_config={ + "custom_ops": + ["none", "+rms_norm", "+rotary_embedding"] + }) as vllm_model: + vllm_model.generate_greedy(example_prompts, max_tokens)