[Test] Add e2e test cases for the Qwen-VL model adaptation to Ascend 310p (#6977)

### What this PR does / why we need it? Add e2e test cases for the Qwen-VL model adaptation to Ascend 310p - vLLM version: v0.16.0 - vLLM main: 15d76f74e2 Signed-off-by: gcw_61wqY8cy <wanghengkang1@huawei.com>
2026-03-06 14:25:10 +08:00
parent 620076b76a
commit c49ce18ea5
6 changed files with 141 additions and 2 deletions
--- a/.github/workflows/_e2e_test.yaml
+++ b/.github/workflows/_e2e_test.yaml
@@ -517,7 +517,8 @@ jobs:
          PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256
          VLLM_WORKER_MULTIPROC_METHOD: spawn
        run: |
-          pytest -sv --durations=0 tests/e2e/310p/singlecard/test_dense_model_singlecard.py
+          pytest -sv --durations=0 tests/e2e/310p/singlecard/test_dense_model_singlecard.py \
+          tests/e2e/310p/singlecard/test_vl_model_singlecard.py

  e2e_310p-4cards:
    name: 310p multicards 4cards
@@ -577,4 +578,5 @@ jobs:
        run: |
          pytest -sv --durations=0 \
          tests/e2e/310p/multicard/test_dense_model_multicard.py \
-          tests/e2e/310p/multicard/test_moe_model_multicard.py
+          tests/e2e/310p/multicard/test_moe_model_multicard.py \
+          tests/e2e/310p/multicard/test_vl_model_multicard.py
--- a/.github/workflows/pr_test_light.yaml
+++ b/.github/workflows/pr_test_light.yaml
@@ -77,6 +77,7 @@ jobs:
              - '.github/workflows/pr_test_light.yaml'
            _310_tracker:
              - 'vllm_ascend/_310p/**'
+              - 'tests/e2e/310p/**'
              - 'vllm_ascend/worker/model_runner_v1.py'
              - 'vllm_ascend/attention/attention_v1.py'
              - 'vllm_ascend/ops/fused_moe/**'
--- a/tests/e2e/310p/data/qwen.png
+++ b/tests/e2e/310p/data/qwen.png
--- a/tests/e2e/310p/multicard/test_vl_model_multicard.py
+++ b/tests/e2e/310p/multicard/test_vl_model_multicard.py
@@ -0,0 +1,42 @@
+#
+# Copyright (c) 2026 Huawei Technologies Co., Ltd. All Rights Reserved.
+# Copyright 2023 The vLLM team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# This file is a part of the vllm-ascend project.
+
+import sys
+import os
+
+# Add 310p directory to sys.path
+current_dir = os.path.dirname(os.path.abspath(__file__))
+parent_dir = os.path.dirname(current_dir)  # 310p directory
+sys.path.insert(0, parent_dir)
+
+from test_utils import run_vl_model_test
+
+def test_qwen3_vl_8b_tp2_fp16():
+    """Qwen3-VL-8B dual-card FP16 test"""
+    run_vl_model_test(
+        model_name="Qwen/Qwen3-VL-8B-Instruct",
+        tensor_parallel_size=2,
+        max_tokens=5
+    )
+
+def test_qwen3_vl_32b_tp1_fp16():
+    """Qwen3-VL-32B 4-card FP16 test"""
+    run_vl_model_test(
+        model_name="Qwen/Qwen3-VL-32B-Instruct",
+        tensor_parallel_size=4,
+        max_tokens=5
+    )
--- a/tests/e2e/310p/singlecard/test_vl_model_singlecard.py
+++ b/tests/e2e/310p/singlecard/test_vl_model_singlecard.py
@@ -0,0 +1,34 @@
+#
+# Copyright (c) 2026 Huawei Technologies Co., Ltd. All Rights Reserved.
+# Copyright 2023 The vLLM team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# This file is a part of the vllm-ascend project.
+
+import sys
+import os
+
+# Add 310p directory to sys.path
+current_dir = os.path.dirname(os.path.abspath(__file__))
+parent_dir = os.path.dirname(current_dir)  # 310p directory
+sys.path.insert(0, parent_dir)
+
+from test_utils import run_vl_model_test
+
+def test_qwen3_vl_8b_tp1_fp16():
+    """Qwen3-VL-8B single-card FP16 test"""
+    run_vl_model_test(
+        model_name="Qwen/Qwen3-VL-8B-Instruct",
+        tensor_parallel_size=1,
+        max_tokens=5
+    )
--- a/tests/e2e/310p/test_utils.py
+++ b/tests/e2e/310p/test_utils.py
@@ -0,0 +1,60 @@
+#
+# Copyright (c) 2026 Huawei Technologies Co., Ltd. All Rights Reserved.
+# Copyright 2023 The vLLM team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# This file is a part of the vllm-ascend project.
+
+from tests.e2e.conftest import VllmRunner
+from PIL import Image
+import os
+
+
+def get_test_image():
+    """Get the image object for testing"""
+    current_dir = os.path.dirname(os.path.abspath(__file__))
+    image_path = os.path.join(current_dir, "data", "qwen.png")
+    return Image.open(image_path)
+
+
+def get_test_prompts():
+    """Get the prompts for testing"""
+    return ["<|image_pad|>Describe this image in detail."]
+
+
+def run_vl_model_test(model_name: str, 
+                      tensor_parallel_size: int, 
+                      max_tokens: int,
+                      dtype: str = "float16",
+                      enforce_eager: bool = True):
+    """
+    Generic visual language model test function
+    
+    Args:
+        model_name: Model name, e.g., "Qwen/Qwen3-VL-4B"
+        tensor_parallel_size: Tensor parallel size
+        max_tokens: Maximum number of generated tokens
+        dtype: Data type, default is float16
+        enforce_eager: Whether to enforce eager mode
+    """
+    image = get_test_image()
+    images = [image]
+    prompts = get_test_prompts()
+
+    with VllmRunner(
+            model_name,
+            tensor_parallel_size=tensor_parallel_size,
+            enforce_eager=enforce_eager,
+            dtype=dtype
+    ) as vllm_model:
+        vllm_model.generate_greedy(prompts, max_tokens, images=images)