[CI] Add 310p e2e test back (#5797)

This PR add 310 e2e test back to ensure the related PR will be tested on
310.
1. for light e2e, we'll run 310p test if the changed files are located
in `vllm_ascend/_310p`
2. for full e2e, we'll always run 310p test
3. for main2main test, we'll stop run 310p test

- vLLM version: v0.13.0
- vLLM main:
2f4e6548ef

Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
This commit is contained in:
wangxiyuan
2026-01-15 15:47:13 +08:00
committed by GitHub
parent e8bbf72867
commit a25209252f
7 changed files with 182 additions and 144 deletions

View File

@@ -15,6 +15,9 @@ on:
type:
required: true
type: string
contains_310:
required: true
type: boolean
jobs:
e2e:
@@ -328,3 +331,119 @@ jobs:
# spec_decode
pytest -sv --durations=0 tests/e2e/multicard/4-cards/spec_decode/test_mtp_qwen3_next.py
e2e_310p:
name: 310p singlecard
runs-on: linux-aarch64-310p-1
if: ${{ inputs.contains_310 }}
container:
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-310p-ubuntu22.04-py3.11
env:
VLLM_LOGGING_LEVEL: ERROR
VLLM_USE_MODELSCOPE: True
TRANSFORMERS_OFFLINE: 1
steps:
- name: Check npu and CANN info
run: |
npu-smi info
cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
- name: Config mirrors
run: |
sed -Ei 's@(ports|archive).ubuntu.com@cache-service.nginx-pypi-cache.svc.cluster.local:8081@g' /etc/apt/sources.list
pip config set global.index-url http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple
pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local
apt-get update -y
apt install git -y
- name: Checkout vllm-project/vllm-ascend repo
uses: actions/checkout@v6
- name: Install system dependencies
run: |
apt-get -y install `cat packages.txt`
apt-get -y install gcc g++ cmake libnuma-dev
- name: Checkout vllm-project/vllm repo
uses: actions/checkout@v6
with:
repository: vllm-project/vllm
ref: ${{ inputs.vllm }}
path: ./vllm-empty
fetch-depth: 1
- name: Install vllm-project/vllm from source
working-directory: ./vllm-empty
run: |
VLLM_TARGET_DEVICE=empty pip install -e .
- name: Install vllm-project/vllm-ascend
env:
PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi
run: |
pip install -r requirements-dev.txt
pip install -v -e .
- name: Run vllm-project/vllm-ascend test
env:
PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256
VLLM_WORKER_MULTIPROC_METHOD: spawn
run: |
pytest -sv --durations=0 tests/e2e/310p/test_offline_inference_310p.py
e2e_310p-4cards:
name: 310p multicards 4cards
runs-on: linux-aarch64-310p-4
if: ${{ inputs.contains_310 }}
container:
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-310p-ubuntu22.04-py3.11
env:
VLLM_LOGGING_LEVEL: ERROR
VLLM_USE_MODELSCOPE: True
TRANSFORMERS_OFFLINE: 1
steps:
- name: Check npu and CANN info
run: |
npu-smi info
cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
- name: Config mirrors
run: |
sed -Ei 's@(ports|archive).ubuntu.com@cache-service.nginx-pypi-cache.svc.cluster.local:8081@g' /etc/apt/sources.list
pip config set global.index-url http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple
pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local
apt-get update -y
apt install git -y
- name: Checkout vllm-project/vllm-ascend repo
uses: actions/checkout@v6
- name: Install system dependencies
run: |
apt-get -y install `cat packages.txt`
apt-get -y install gcc g++ cmake libnuma-dev
- name: Checkout vllm-project/vllm repo
uses: actions/checkout@v6
with:
repository: vllm-project/vllm
ref: ${{ inputs.vllm }}
path: ./vllm-empty
fetch-depth: 1
- name: Install vllm-project/vllm from source
working-directory: ./vllm-empty
run: |
VLLM_TARGET_DEVICE=empty pip install -e .
- name: Install vllm-project/vllm-ascend
env:
PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi
run: |
pip install -r requirements-dev.txt
pip install -v -e .
- name: Run vllm-project/vllm-ascend test
env:
PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256
VLLM_WORKER_MULTIPROC_METHOD: spawn
run: |
pytest -sv --durations=0 tests/e2e/310p/test_offline_inference_parallel_310p.py

View File

@@ -1,110 +0,0 @@
#
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# This file is a part of the vllm-ascend project.
#
name: 310p Labeled Test
on:
pull_request:
types: [ labeled ]
# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
# declared as "shell: bash -el {0}" on steps that need to be properly activated.
# It's used to activate ascend-toolkit environment variables.
defaults:
run:
shell: bash -el {0}
# only cancel in-progress runs of the same workflow
# and ignore the lint / 1 card / 4 cards test type
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
jobs:
e2e:
# e2e-310p-test will be triggered when tag 'e2e-310p-test' & 'ready-for-test' or schedule job
if: >-
${{
(contains(github.event.pull_request.labels.*.name, 'e2e-310p-test')) &&
contains(github.event.pull_request.labels.*.name, 'ready-for-test') ||
github.event_name == 'schedule' || github.event_name == 'push'
}}
strategy:
max-parallel: 2
matrix:
os: [linux-aarch64-310p-1, linux-aarch64-310p-4]
vllm_version: [v0.11.0]
name: 310p e2e test
runs-on: ${{ matrix.os }}
container:
# TODO(yikun): Remove m.daocloud.io prefix when infra proxy ready
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-310p-ubuntu22.04-py3.11
env:
VLLM_LOGGING_LEVEL: ERROR
VLLM_USE_MODELSCOPE: True
steps:
- name: Check npu and CANN info
run: |
npu-smi info
cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
- name: Config mirrors
run: |
sed -Ei 's@(ports|archive).ubuntu.com@cache-service.nginx-pypi-cache.svc.cluster.local:8081@g' /etc/apt/sources.list
pip config set global.index-url http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple
pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local
apt-get update -y
apt install git -y
- name: Checkout vllm-project/vllm-ascend repo
uses: actions/checkout@v6
- name: Install system dependencies
run: |
apt-get -y install `cat packages.txt`
apt-get -y install git vim wget net-tools gcc g++ cmake libnuma-dev curl gnupg2
- name: Checkout vllm-project/vllm repo
uses: actions/checkout@v6
with:
repository: vllm-project/vllm
ref: ${{ matrix.vllm_version }}
path: ./vllm-empty
- name: Install vllm-project/vllm from source
working-directory: ./vllm-empty
run: |
VLLM_TARGET_DEVICE=empty pip install -e .
- name: Install vllm-project/vllm-ascend
run: |
export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/x86_64-linux/devlib
pip install -r requirements-dev.txt
pip install -v -e .
- name: Run e2e test
env:
VLLM_WORKER_MULTIPROC_METHOD: spawn
VLLM_USE_MODELSCOPE: True
PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256
run: |
if [[ "${{ matrix.os }}" == "linux-aarch64-310p-1" ]]; then
pytest -sv tests/e2e/310p/test_offline_inference_310p.py
else
pytest -sv tests/e2e/310p/test_offline_inference_parallel_310p.py
fi

View File

@@ -83,4 +83,5 @@ jobs:
vllm: ${{ matrix.vllm_version }}
runner: linux-aarch64-a2
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11
contains_310: true
type: full

View File

@@ -47,6 +47,7 @@ jobs:
outputs:
e2e_tracker: ${{ steps.filter.outputs.e2e_tracker }}
ut_tracker: ${{ steps.filter.outputs.ut_tracker }}
_310_tracker: ${{ steps.filter.outputs._310_tracker }}
steps:
- name: Setup git proxy
run: |
@@ -73,6 +74,8 @@ jobs:
ut_tracker:
- 'tests/ut/**'
- '.github/workflows/pr_test_light.yaml'
_310_tracker:
- 'vllm_ascend/_310p/**'
ut:
needs: [lint, changes]
@@ -103,4 +106,5 @@ jobs:
vllm: ${{ matrix.vllm_version }}
runner: linux-aarch64-a2
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11
contains_310: ${{ needs.changes.outputs._310_tracker == 'true' }}
type: light

View File

@@ -36,4 +36,5 @@ jobs:
vllm: main
runner: linux-aarch64-a2
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11
contains_310: false
type: full

View File

@@ -15,58 +15,61 @@
# limitations under the License.
# This file is a part of the vllm-ascend project.
import pytest
import vllm # noqa: F401
from vllm import SamplingParams
from vllm.assets.image import ImageAsset
import vllm_ascend # noqa: F401
from tests.e2e.conftest import VllmRunner
MODELS = ["Qwen/Qwen3-0.6B", "Qwen/Qwen2.5-7B-Instruct"]
@pytest.mark.parametrize("model", MODELS)
@pytest.mark.parametrize("dtype", ["float16"])
@pytest.mark.parametrize("max_tokens", [5])
def test_models(model: str, dtype: str, max_tokens: int) -> None:
def test_llm_models(dtype: str, max_tokens: int) -> None:
example_prompts = [
"Hello, my name is",
"The future of AI is",
]
with VllmRunner(model,
with VllmRunner("Qwen/Qwen3-0.6B",
tensor_parallel_size=1,
dtype=dtype,
max_model_len=2048,
enforce_eager=True,
compilation_config={
"custom_ops":
["none", "+rms_norm", "+rotary_embedding"]
}) as vllm_model:
enforce_eager=True) as vllm_model:
vllm_model.generate_greedy(example_prompts, max_tokens)
VL_MODELS = ["Qwen/Qwen2.5-VL-3B-Instruct"]
def test_multimodal_vl():
image = ImageAsset("cherry_blossom").pil_image.convert("RGB")
@pytest.mark.parametrize("model", MODELS)
@pytest.mark.parametrize("dtype", ["float16"])
def test_vl_model_with_samples(model: str, dtype: str) -> None:
example_prompts = [
"Hello, my name is",
"The future of AI is",
img_questions = [
"What is the content of this image?",
"Describe the content of this image in detail.",
"What's in the image?",
"Where is this image taken?",
]
with VllmRunner(model,
tensor_parallel_size=1,
dtype=dtype,
max_model_len=2048,
images = [image] * len(img_questions)
placeholder = "<|image_pad|>"
prompts = [
("<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n"
f"<|im_start|>user\n<|vision_start|>{placeholder}<|vision_end|>"
f"{q}<|im_end|>\n<|im_start|>assistant\n") for q in img_questions
]
with VllmRunner("Qwen/Qwen2.5-VL-3B-Instruct",
mm_processor_kwargs={
"min_pixels": 28 * 28,
"max_pixels": 1280 * 28 * 28,
"fps": 1,
},
max_model_len=8192,
enforce_eager=True,
compilation_config={
"custom_ops":
["none", "+rms_norm", "+rotary_embedding"]
}) as vllm_model:
sampling_params = SamplingParams(max_tokens=100,
top_p=0.95,
top_k=50,
temperature=0.6)
vllm_model.generate(example_prompts, sampling_params)
limit_mm_per_prompt={"image": 1}) as vllm_model:
outputs = vllm_model.generate_greedy(
prompts=prompts,
images=images,
max_tokens=64,
)
assert len(outputs) == len(prompts)
for _, output_str in outputs:
assert output_str, "Generated output should not be empty."

View File

@@ -0,0 +1,20 @@
import pytest
from tests.e2e.conftest import VllmRunner
@pytest.mark.parametrize("dtype", ["float16"])
@pytest.mark.parametrize("max_tokens", [5])
@pytest.skip("310p does not support parallel inference now. Fix me")
def test_models(dtype: str, max_tokens: int) -> None:
example_prompts = [
"Hello, my name is",
"The future of AI is",
]
with VllmRunner("Qwen/Qwen3-0.6B",
tensor_parallel_size=4,
dtype=dtype,
max_model_len=2048,
enforce_eager=True) as vllm_model:
vllm_model.generate_greedy(example_prompts, max_tokens)