[Test] Add e2e test cases for the Qwen-VL model adaptation to Ascend 310p (#6977)

### What this PR does / why we need it? Add e2e test cases for the Qwen-VL model adaptation to Ascend 310p - vLLM version: v0.16.0 - vLLM main: 15d76f74e2 Signed-off-by: gcw_61wqY8cy <wanghengkang1@huawei.com>
2026-03-06 14:25:10 +08:00
parent 620076b76a
commit c49ce18ea5
6 changed files with 141 additions and 2 deletions
--- a/.github/workflows/_e2e_test.yaml
+++ b/.github/workflows/_e2e_test.yaml
@@ -517,7 +517,8 @@ jobs:
          PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256
          VLLM_WORKER_MULTIPROC_METHOD: spawn
        run: |
-          pytest -sv --durations=0 tests/e2e/310p/singlecard/test_dense_model_singlecard.py
+          pytest -sv --durations=0 tests/e2e/310p/singlecard/test_dense_model_singlecard.py \
          tests/e2e/310p/singlecard/test_vl_model_singlecard.py
  e2e_310p-4cards:
    name: 310p multicards 4cards
@@ -577,4 +578,5 @@ jobs:
        run: |
          pytest -sv --durations=0 \
          tests/e2e/310p/multicard/test_dense_model_multicard.py \
-          tests/e2e/310p/multicard/test_moe_model_multicard.py
+          tests/e2e/310p/multicard/test_moe_model_multicard.py \
          tests/e2e/310p/multicard/test_vl_model_multicard.py
--- a/.github/workflows/pr_test_light.yaml
+++ b/.github/workflows/pr_test_light.yaml
@@ -77,6 +77,7 @@ jobs:
              - '.github/workflows/pr_test_light.yaml'
            _310_tracker:
              - 'vllm_ascend/_310p/**'
              - 'tests/e2e/310p/**'
              - 'vllm_ascend/worker/model_runner_v1.py'
              - 'vllm_ascend/attention/attention_v1.py'
              - 'vllm_ascend/ops/fused_moe/**'
--- a/tests/e2e/310p/data/qwen.png
+++ b/tests/e2e/310p/data/qwen.png
--- a/tests/e2e/310p/multicard/test_vl_model_multicard.py
+++ b/tests/e2e/310p/multicard/test_vl_model_multicard.py
@@ -0,0 +1,42 @@
 #
 # Copyright (c) 2026 Huawei Technologies Co., Ltd. All Rights Reserved.
 # Copyright 2023 The vLLM team.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # This file is a part of the vllm-ascend project.
 import sys
 import os
 # Add 310p directory to sys.path
 current_dir = os.path.dirname(os.path.abspath(__file__))
 parent_dir = os.path.dirname(current_dir)  # 310p directory
 sys.path.insert(0, parent_dir)
 from test_utils import run_vl_model_test
 def test_qwen3_vl_8b_tp2_fp16():
    """Qwen3-VL-8B dual-card FP16 test"""
    run_vl_model_test(
        model_name="Qwen/Qwen3-VL-8B-Instruct",
        tensor_parallel_size=2,
        max_tokens=5
    )
 def test_qwen3_vl_32b_tp1_fp16():
    """Qwen3-VL-32B 4-card FP16 test"""
    run_vl_model_test(
        model_name="Qwen/Qwen3-VL-32B-Instruct",
        tensor_parallel_size=4,
        max_tokens=5
    )
--- a/tests/e2e/310p/singlecard/test_vl_model_singlecard.py
+++ b/tests/e2e/310p/singlecard/test_vl_model_singlecard.py
@@ -0,0 +1,34 @@
 #
 # Copyright (c) 2026 Huawei Technologies Co., Ltd. All Rights Reserved.
 # Copyright 2023 The vLLM team.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # This file is a part of the vllm-ascend project.
 import sys
 import os
 # Add 310p directory to sys.path
 current_dir = os.path.dirname(os.path.abspath(__file__))
 parent_dir = os.path.dirname(current_dir)  # 310p directory
 sys.path.insert(0, parent_dir)
 from test_utils import run_vl_model_test
 def test_qwen3_vl_8b_tp1_fp16():
    """Qwen3-VL-8B single-card FP16 test"""
    run_vl_model_test(
        model_name="Qwen/Qwen3-VL-8B-Instruct",
        tensor_parallel_size=1,
        max_tokens=5
    )
--- a/tests/e2e/310p/test_utils.py
+++ b/tests/e2e/310p/test_utils.py
@@ -0,0 +1,60 @@
 #
 # Copyright (c) 2026 Huawei Technologies Co., Ltd. All Rights Reserved.
 # Copyright 2023 The vLLM team.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # This file is a part of the vllm-ascend project.
 from tests.e2e.conftest import VllmRunner
 from PIL import Image
 import os
 def get_test_image():
    """Get the image object for testing"""
    current_dir = os.path.dirname(os.path.abspath(__file__))
    image_path = os.path.join(current_dir, "data", "qwen.png")
    return Image.open(image_path)
 def get_test_prompts():
    """Get the prompts for testing"""
    return ["<|image_pad|>Describe this image in detail."]
 def run_vl_model_test(model_name: str, 
                      tensor_parallel_size: int, 
                      max_tokens: int,
                      dtype: str = "float16",
                      enforce_eager: bool = True):
    """
    Generic visual language model test function
    Args:
        model_name: Model name, e.g., "Qwen/Qwen3-VL-4B"
        tensor_parallel_size: Tensor parallel size
        max_tokens: Maximum number of generated tokens
        dtype: Data type, default is float16
        enforce_eager: Whether to enforce eager mode
    """
    image = get_test_image()
    images = [image]
    prompts = get_test_prompts()
    with VllmRunner(
            model_name,
            tensor_parallel_size=tensor_parallel_size,
            enforce_eager=enforce_eager,
            dtype=dtype
    ) as vllm_model:
        vllm_model.generate_greedy(prompts, max_tokens, images=images)