diff --git a/.github/workflows/_e2e_test.yaml b/.github/workflows/_e2e_test.yaml index 7ab5c3f8..fb84bd72 100644 --- a/.github/workflows/_e2e_test.yaml +++ b/.github/workflows/_e2e_test.yaml @@ -517,7 +517,8 @@ jobs: PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256 VLLM_WORKER_MULTIPROC_METHOD: spawn run: | - pytest -sv --durations=0 tests/e2e/310p/singlecard/test_dense_model_singlecard.py + pytest -sv --durations=0 tests/e2e/310p/singlecard/test_dense_model_singlecard.py \ + tests/e2e/310p/singlecard/test_vl_model_singlecard.py e2e_310p-4cards: name: 310p multicards 4cards @@ -577,4 +578,5 @@ jobs: run: | pytest -sv --durations=0 \ tests/e2e/310p/multicard/test_dense_model_multicard.py \ - tests/e2e/310p/multicard/test_moe_model_multicard.py + tests/e2e/310p/multicard/test_moe_model_multicard.py \ + tests/e2e/310p/multicard/test_vl_model_multicard.py diff --git a/.github/workflows/pr_test_light.yaml b/.github/workflows/pr_test_light.yaml index d3c9a881..bb98dfe3 100644 --- a/.github/workflows/pr_test_light.yaml +++ b/.github/workflows/pr_test_light.yaml @@ -77,6 +77,7 @@ jobs: - '.github/workflows/pr_test_light.yaml' _310_tracker: - 'vllm_ascend/_310p/**' + - 'tests/e2e/310p/**' - 'vllm_ascend/worker/model_runner_v1.py' - 'vllm_ascend/attention/attention_v1.py' - 'vllm_ascend/ops/fused_moe/**' diff --git a/tests/e2e/310p/data/qwen.png b/tests/e2e/310p/data/qwen.png new file mode 100644 index 00000000..49be9893 Binary files /dev/null and b/tests/e2e/310p/data/qwen.png differ diff --git a/tests/e2e/310p/multicard/test_vl_model_multicard.py b/tests/e2e/310p/multicard/test_vl_model_multicard.py new file mode 100644 index 00000000..0016ea86 --- /dev/null +++ b/tests/e2e/310p/multicard/test_vl_model_multicard.py @@ -0,0 +1,42 @@ +# +# Copyright (c) 2026 Huawei Technologies Co., Ltd. All Rights Reserved. +# Copyright 2023 The vLLM team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# This file is a part of the vllm-ascend project. + +import sys +import os + +# Add 310p directory to sys.path +current_dir = os.path.dirname(os.path.abspath(__file__)) +parent_dir = os.path.dirname(current_dir) # 310p directory +sys.path.insert(0, parent_dir) + +from test_utils import run_vl_model_test + +def test_qwen3_vl_8b_tp2_fp16(): + """Qwen3-VL-8B dual-card FP16 test""" + run_vl_model_test( + model_name="Qwen/Qwen3-VL-8B-Instruct", + tensor_parallel_size=2, + max_tokens=5 + ) + +def test_qwen3_vl_32b_tp1_fp16(): + """Qwen3-VL-32B 4-card FP16 test""" + run_vl_model_test( + model_name="Qwen/Qwen3-VL-32B-Instruct", + tensor_parallel_size=4, + max_tokens=5 + ) \ No newline at end of file diff --git a/tests/e2e/310p/singlecard/test_vl_model_singlecard.py b/tests/e2e/310p/singlecard/test_vl_model_singlecard.py new file mode 100644 index 00000000..380116c7 --- /dev/null +++ b/tests/e2e/310p/singlecard/test_vl_model_singlecard.py @@ -0,0 +1,34 @@ +# +# Copyright (c) 2026 Huawei Technologies Co., Ltd. All Rights Reserved. +# Copyright 2023 The vLLM team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# This file is a part of the vllm-ascend project. + +import sys +import os + +# Add 310p directory to sys.path +current_dir = os.path.dirname(os.path.abspath(__file__)) +parent_dir = os.path.dirname(current_dir) # 310p directory +sys.path.insert(0, parent_dir) + +from test_utils import run_vl_model_test + +def test_qwen3_vl_8b_tp1_fp16(): + """Qwen3-VL-8B single-card FP16 test""" + run_vl_model_test( + model_name="Qwen/Qwen3-VL-8B-Instruct", + tensor_parallel_size=1, + max_tokens=5 + ) \ No newline at end of file diff --git a/tests/e2e/310p/test_utils.py b/tests/e2e/310p/test_utils.py new file mode 100644 index 00000000..f9c521fd --- /dev/null +++ b/tests/e2e/310p/test_utils.py @@ -0,0 +1,60 @@ +# +# Copyright (c) 2026 Huawei Technologies Co., Ltd. All Rights Reserved. +# Copyright 2023 The vLLM team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# This file is a part of the vllm-ascend project. + +from tests.e2e.conftest import VllmRunner +from PIL import Image +import os + + +def get_test_image(): + """Get the image object for testing""" + current_dir = os.path.dirname(os.path.abspath(__file__)) + image_path = os.path.join(current_dir, "data", "qwen.png") + return Image.open(image_path) + + +def get_test_prompts(): + """Get the prompts for testing""" + return ["<|image_pad|>Describe this image in detail."] + + +def run_vl_model_test(model_name: str, + tensor_parallel_size: int, + max_tokens: int, + dtype: str = "float16", + enforce_eager: bool = True): + """ + Generic visual language model test function + + Args: + model_name: Model name, e.g., "Qwen/Qwen3-VL-4B" + tensor_parallel_size: Tensor parallel size + max_tokens: Maximum number of generated tokens + dtype: Data type, default is float16 + enforce_eager: Whether to enforce eager mode + """ + image = get_test_image() + images = [image] + prompts = get_test_prompts() + + with VllmRunner( + model_name, + tensor_parallel_size=tensor_parallel_size, + enforce_eager=enforce_eager, + dtype=dtype + ) as vllm_model: + vllm_model.generate_greedy(prompts, max_tokens, images=images) \ No newline at end of file