From a25209252f4b83465303623acd79c6b3ba6ed226 Mon Sep 17 00:00:00 2001 From: wangxiyuan Date: Thu, 15 Jan 2026 15:47:13 +0800 Subject: [PATCH] [CI] Add 310p e2e test back (#5797) This PR add 310 e2e test back to ensure the related PR will be tested on 310. 1. for light e2e, we'll run 310p test if the changed files are located in `vllm_ascend/_310p` 2. for full e2e, we'll always run 310p test 3. for main2main test, we'll stop run 310p test - vLLM version: v0.13.0 - vLLM main: https://github.com/vllm-project/vllm/commit/2f4e6548efec402b913ffddc8726230d9311948d Signed-off-by: wangxiyuan --- .github/workflows/_e2e_test.yaml | 119 ++++++++++++++++++ .github/workflows/labled_test_310.yaml | 110 ---------------- .github/workflows/pr_test_full.yaml | 1 + .github/workflows/pr_test_light.yaml | 4 + .../workflows/schedule_test_vllm_main.yaml | 1 + tests/e2e/310p/test_offline_inference_310p.py | 71 ++++++----- .../test_offline_inference_parallel_310p.py | 20 +++ 7 files changed, 182 insertions(+), 144 deletions(-) delete mode 100644 .github/workflows/labled_test_310.yaml create mode 100644 tests/e2e/310p/test_offline_inference_parallel_310p.py diff --git a/.github/workflows/_e2e_test.yaml b/.github/workflows/_e2e_test.yaml index 84117eda..ed744973 100644 --- a/.github/workflows/_e2e_test.yaml +++ b/.github/workflows/_e2e_test.yaml @@ -15,6 +15,9 @@ on: type: required: true type: string + contains_310: + required: true + type: boolean jobs: e2e: @@ -328,3 +331,119 @@ jobs: # spec_decode pytest -sv --durations=0 tests/e2e/multicard/4-cards/spec_decode/test_mtp_qwen3_next.py + + e2e_310p: + name: 310p singlecard + runs-on: linux-aarch64-310p-1 + if: ${{ inputs.contains_310 }} + container: + image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-310p-ubuntu22.04-py3.11 + env: + VLLM_LOGGING_LEVEL: ERROR + VLLM_USE_MODELSCOPE: True + TRANSFORMERS_OFFLINE: 1 + steps: + - name: Check npu and CANN info + run: | + npu-smi info + cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info + - name: Config mirrors + run: | + sed -Ei 's@(ports|archive).ubuntu.com@cache-service.nginx-pypi-cache.svc.cluster.local:8081@g' /etc/apt/sources.list + pip config set global.index-url http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple + pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local + apt-get update -y + apt install git -y + + - name: Checkout vllm-project/vllm-ascend repo + uses: actions/checkout@v6 + + - name: Install system dependencies + run: | + apt-get -y install `cat packages.txt` + apt-get -y install gcc g++ cmake libnuma-dev + + - name: Checkout vllm-project/vllm repo + uses: actions/checkout@v6 + with: + repository: vllm-project/vllm + ref: ${{ inputs.vllm }} + path: ./vllm-empty + fetch-depth: 1 + + - name: Install vllm-project/vllm from source + working-directory: ./vllm-empty + run: | + VLLM_TARGET_DEVICE=empty pip install -e . + + - name: Install vllm-project/vllm-ascend + env: + PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi + run: | + pip install -r requirements-dev.txt + pip install -v -e . + + - name: Run vllm-project/vllm-ascend test + env: + PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256 + VLLM_WORKER_MULTIPROC_METHOD: spawn + run: | + pytest -sv --durations=0 tests/e2e/310p/test_offline_inference_310p.py + + e2e_310p-4cards: + name: 310p multicards 4cards + runs-on: linux-aarch64-310p-4 + if: ${{ inputs.contains_310 }} + container: + image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-310p-ubuntu22.04-py3.11 + env: + VLLM_LOGGING_LEVEL: ERROR + VLLM_USE_MODELSCOPE: True + TRANSFORMERS_OFFLINE: 1 + steps: + - name: Check npu and CANN info + run: | + npu-smi info + cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info + - name: Config mirrors + run: | + sed -Ei 's@(ports|archive).ubuntu.com@cache-service.nginx-pypi-cache.svc.cluster.local:8081@g' /etc/apt/sources.list + pip config set global.index-url http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple + pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local + apt-get update -y + apt install git -y + + - name: Checkout vllm-project/vllm-ascend repo + uses: actions/checkout@v6 + + - name: Install system dependencies + run: | + apt-get -y install `cat packages.txt` + apt-get -y install gcc g++ cmake libnuma-dev + + - name: Checkout vllm-project/vllm repo + uses: actions/checkout@v6 + with: + repository: vllm-project/vllm + ref: ${{ inputs.vllm }} + path: ./vllm-empty + fetch-depth: 1 + + - name: Install vllm-project/vllm from source + working-directory: ./vllm-empty + run: | + VLLM_TARGET_DEVICE=empty pip install -e . + + - name: Install vllm-project/vllm-ascend + env: + PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi + run: | + pip install -r requirements-dev.txt + pip install -v -e . + + - name: Run vllm-project/vllm-ascend test + env: + PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256 + VLLM_WORKER_MULTIPROC_METHOD: spawn + run: | + pytest -sv --durations=0 tests/e2e/310p/test_offline_inference_parallel_310p.py diff --git a/.github/workflows/labled_test_310.yaml b/.github/workflows/labled_test_310.yaml deleted file mode 100644 index acd06346..00000000 --- a/.github/workflows/labled_test_310.yaml +++ /dev/null @@ -1,110 +0,0 @@ -# -# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# This file is a part of the vllm-ascend project. -# - -name: 310p Labeled Test - -on: - pull_request: - types: [ labeled ] - -# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly -# declared as "shell: bash -el {0}" on steps that need to be properly activated. -# It's used to activate ascend-toolkit environment variables. -defaults: - run: - shell: bash -el {0} - -# only cancel in-progress runs of the same workflow -# and ignore the lint / 1 card / 4 cards test type -concurrency: - group: ${{ github.workflow }}-${{ github.ref }} - cancel-in-progress: true - -jobs: - e2e: - # e2e-310p-test will be triggered when tag 'e2e-310p-test' & 'ready-for-test' or schedule job - if: >- - ${{ - (contains(github.event.pull_request.labels.*.name, 'e2e-310p-test')) && - contains(github.event.pull_request.labels.*.name, 'ready-for-test') || - github.event_name == 'schedule' || github.event_name == 'push' - }} - strategy: - max-parallel: 2 - matrix: - os: [linux-aarch64-310p-1, linux-aarch64-310p-4] - vllm_version: [v0.11.0] - name: 310p e2e test - runs-on: ${{ matrix.os }} - container: - # TODO(yikun): Remove m.daocloud.io prefix when infra proxy ready - image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-310p-ubuntu22.04-py3.11 - env: - VLLM_LOGGING_LEVEL: ERROR - VLLM_USE_MODELSCOPE: True - steps: - - name: Check npu and CANN info - run: | - npu-smi info - cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info - - - name: Config mirrors - run: | - sed -Ei 's@(ports|archive).ubuntu.com@cache-service.nginx-pypi-cache.svc.cluster.local:8081@g' /etc/apt/sources.list - pip config set global.index-url http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple - pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local - apt-get update -y - apt install git -y - - - name: Checkout vllm-project/vllm-ascend repo - uses: actions/checkout@v6 - - - name: Install system dependencies - run: | - apt-get -y install `cat packages.txt` - apt-get -y install git vim wget net-tools gcc g++ cmake libnuma-dev curl gnupg2 - - - name: Checkout vllm-project/vllm repo - uses: actions/checkout@v6 - with: - repository: vllm-project/vllm - ref: ${{ matrix.vllm_version }} - path: ./vllm-empty - - - name: Install vllm-project/vllm from source - working-directory: ./vllm-empty - run: | - VLLM_TARGET_DEVICE=empty pip install -e . - - - name: Install vllm-project/vllm-ascend - run: | - export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi - export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/x86_64-linux/devlib - pip install -r requirements-dev.txt - pip install -v -e . - - - name: Run e2e test - env: - VLLM_WORKER_MULTIPROC_METHOD: spawn - VLLM_USE_MODELSCOPE: True - PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256 - run: | - if [[ "${{ matrix.os }}" == "linux-aarch64-310p-1" ]]; then - pytest -sv tests/e2e/310p/test_offline_inference_310p.py - else - pytest -sv tests/e2e/310p/test_offline_inference_parallel_310p.py - fi diff --git a/.github/workflows/pr_test_full.yaml b/.github/workflows/pr_test_full.yaml index 9d26c2ac..16461015 100644 --- a/.github/workflows/pr_test_full.yaml +++ b/.github/workflows/pr_test_full.yaml @@ -83,4 +83,5 @@ jobs: vllm: ${{ matrix.vllm_version }} runner: linux-aarch64-a2 image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11 + contains_310: true type: full diff --git a/.github/workflows/pr_test_light.yaml b/.github/workflows/pr_test_light.yaml index 405d1ba6..3180f561 100644 --- a/.github/workflows/pr_test_light.yaml +++ b/.github/workflows/pr_test_light.yaml @@ -47,6 +47,7 @@ jobs: outputs: e2e_tracker: ${{ steps.filter.outputs.e2e_tracker }} ut_tracker: ${{ steps.filter.outputs.ut_tracker }} + _310_tracker: ${{ steps.filter.outputs._310_tracker }} steps: - name: Setup git proxy run: | @@ -73,6 +74,8 @@ jobs: ut_tracker: - 'tests/ut/**' - '.github/workflows/pr_test_light.yaml' + _310_tracker: + - 'vllm_ascend/_310p/**' ut: needs: [lint, changes] @@ -103,4 +106,5 @@ jobs: vllm: ${{ matrix.vllm_version }} runner: linux-aarch64-a2 image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11 + contains_310: ${{ needs.changes.outputs._310_tracker == 'true' }} type: light diff --git a/.github/workflows/schedule_test_vllm_main.yaml b/.github/workflows/schedule_test_vllm_main.yaml index 7f8c7876..c233d25d 100644 --- a/.github/workflows/schedule_test_vllm_main.yaml +++ b/.github/workflows/schedule_test_vllm_main.yaml @@ -36,4 +36,5 @@ jobs: vllm: main runner: linux-aarch64-a2 image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11 + contains_310: false type: full diff --git a/tests/e2e/310p/test_offline_inference_310p.py b/tests/e2e/310p/test_offline_inference_310p.py index 31f7eb92..188865f6 100644 --- a/tests/e2e/310p/test_offline_inference_310p.py +++ b/tests/e2e/310p/test_offline_inference_310p.py @@ -15,58 +15,61 @@ # limitations under the License. # This file is a part of the vllm-ascend project. import pytest -import vllm # noqa: F401 -from vllm import SamplingParams +from vllm.assets.image import ImageAsset -import vllm_ascend # noqa: F401 from tests.e2e.conftest import VllmRunner -MODELS = ["Qwen/Qwen3-0.6B", "Qwen/Qwen2.5-7B-Instruct"] - -@pytest.mark.parametrize("model", MODELS) @pytest.mark.parametrize("dtype", ["float16"]) @pytest.mark.parametrize("max_tokens", [5]) -def test_models(model: str, dtype: str, max_tokens: int) -> None: +def test_llm_models(dtype: str, max_tokens: int) -> None: example_prompts = [ "Hello, my name is", "The future of AI is", ] - with VllmRunner(model, + with VllmRunner("Qwen/Qwen3-0.6B", tensor_parallel_size=1, dtype=dtype, max_model_len=2048, - enforce_eager=True, - compilation_config={ - "custom_ops": - ["none", "+rms_norm", "+rotary_embedding"] - }) as vllm_model: + enforce_eager=True) as vllm_model: vllm_model.generate_greedy(example_prompts, max_tokens) -VL_MODELS = ["Qwen/Qwen2.5-VL-3B-Instruct"] +def test_multimodal_vl(): + image = ImageAsset("cherry_blossom").pil_image.convert("RGB") - -@pytest.mark.parametrize("model", MODELS) -@pytest.mark.parametrize("dtype", ["float16"]) -def test_vl_model_with_samples(model: str, dtype: str) -> None: - example_prompts = [ - "Hello, my name is", - "The future of AI is", + img_questions = [ + "What is the content of this image?", + "Describe the content of this image in detail.", + "What's in the image?", + "Where is this image taken?", ] - with VllmRunner(model, - tensor_parallel_size=1, - dtype=dtype, - max_model_len=2048, + images = [image] * len(img_questions) + placeholder = "<|image_pad|>" + prompts = [ + ("<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n" + f"<|im_start|>user\n<|vision_start|>{placeholder}<|vision_end|>" + f"{q}<|im_end|>\n<|im_start|>assistant\n") for q in img_questions + ] + + with VllmRunner("Qwen/Qwen2.5-VL-3B-Instruct", + mm_processor_kwargs={ + "min_pixels": 28 * 28, + "max_pixels": 1280 * 28 * 28, + "fps": 1, + }, + max_model_len=8192, enforce_eager=True, - compilation_config={ - "custom_ops": - ["none", "+rms_norm", "+rotary_embedding"] - }) as vllm_model: - sampling_params = SamplingParams(max_tokens=100, - top_p=0.95, - top_k=50, - temperature=0.6) - vllm_model.generate(example_prompts, sampling_params) + limit_mm_per_prompt={"image": 1}) as vllm_model: + outputs = vllm_model.generate_greedy( + prompts=prompts, + images=images, + max_tokens=64, + ) + + assert len(outputs) == len(prompts) + + for _, output_str in outputs: + assert output_str, "Generated output should not be empty." diff --git a/tests/e2e/310p/test_offline_inference_parallel_310p.py b/tests/e2e/310p/test_offline_inference_parallel_310p.py new file mode 100644 index 00000000..c6467d60 --- /dev/null +++ b/tests/e2e/310p/test_offline_inference_parallel_310p.py @@ -0,0 +1,20 @@ +import pytest + +from tests.e2e.conftest import VllmRunner + + +@pytest.mark.parametrize("dtype", ["float16"]) +@pytest.mark.parametrize("max_tokens", [5]) +@pytest.skip("310p does not support parallel inference now. Fix me") +def test_models(dtype: str, max_tokens: int) -> None: + example_prompts = [ + "Hello, my name is", + "The future of AI is", + ] + + with VllmRunner("Qwen/Qwen3-0.6B", + tensor_parallel_size=4, + dtype=dtype, + max_model_len=2048, + enforce_eager=True) as vllm_model: + vllm_model.generate_greedy(example_prompts, max_tokens)