From f99762eb254807335419cd04945359eaa5213713 Mon Sep 17 00:00:00 2001 From: Canlin Guo Date: Fri, 31 Oct 2025 15:42:47 +0800 Subject: [PATCH] [E2E][MM] Add e2e tests for InternVL model (#3796) ### What this PR does / why we need it? As a validation for #3664, add end-to-end tests to monitor the InternVL model and ensure its continuous proper operation. This PR is only for single-card. So the models that have more parameters than 8B like 78B are needed to test using multi-cards. ### Does this PR introduce _any_ user-facing change? None. ### How was this patch tested? `pytest -sv tests/e2e/singlecard/multi-modal/test_internvl.py` - vLLM version: v0.11.0 - vLLM main: https://github.com/vllm-project/vllm/commit/83f478bb19489b41e9d208b47b4bb5a95ac171ac --------- Signed-off-by: gcanlin --- .github/workflows/_e2e_test.yaml | 1 + .../singlecard/multi-modal/test_internvl.py | 97 +++++++++++++++++++ 2 files changed, 98 insertions(+) create mode 100644 tests/e2e/singlecard/multi-modal/test_internvl.py diff --git a/.github/workflows/_e2e_test.yaml b/.github/workflows/_e2e_test.yaml index 8d383cff..cb2582e9 100644 --- a/.github/workflows/_e2e_test.yaml +++ b/.github/workflows/_e2e_test.yaml @@ -103,6 +103,7 @@ jobs: pytest -sv tests/e2e/singlecard/test_quantization.py pytest -sv tests/e2e/singlecard/test_sampler.py pytest -sv tests/e2e/singlecard/test_vlm.py + pytest -sv tests/e2e/singlecard/multi-modal/test_internvl.py # ------------------------------------ v1 spec decode test ------------------------------------ # pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py diff --git a/tests/e2e/singlecard/multi-modal/test_internvl.py b/tests/e2e/singlecard/multi-modal/test_internvl.py new file mode 100644 index 00000000..7cf32e4d --- /dev/null +++ b/tests/e2e/singlecard/multi-modal/test_internvl.py @@ -0,0 +1,97 @@ +# +# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# This file is a part of the vllm-ascend project. +# + +import os + +# Set spawn method before any torch/NPU imports to avoid fork issues +os.environ.setdefault('VLLM_WORKER_MULTIPROC_METHOD', 'spawn') + +import pytest +from vllm.assets.image import ImageAsset + +from tests.e2e.conftest import VllmRunner +from tests.e2e.model_utils import check_outputs_equal +from vllm_ascend.utils import vllm_version_is + +MODELS = [ + "OpenGVLab/InternVL2-8B", + "OpenGVLab/InternVL2_5-8B", + "OpenGVLab/InternVL3-8B", + "OpenGVLab/InternVL3_5-8B", +] + +# skip testing InternVL3-8B and InternVL3_5-8B on 0.11.0 due to https://github.com/vllm-project/vllm-ascend/issues/3925. +if vllm_version_is("0.11.0"): + MODELS = [ + "OpenGVLab/InternVL2-8B", + "OpenGVLab/InternVL2_5-8B", + ] + + +@pytest.mark.parametrize("model", MODELS) +def test_internvl_basic(model: str): + """Test basic InternVL2 inference with single image.""" + # Load test image + image = ImageAsset("cherry_blossom").pil_image.convert("RGB") + + # InternVL uses chat template format + # Format: <|im_start|>user\n\nQUESTION<|im_end|>\n<|im_start|>assistant\n + questions = [ + "What is the content of this image?", + "Describe this image in detail.", + ] + + # Build prompts with InternVL2 chat template + prompts = [ + f"<|im_start|>user\n\n{q}<|im_end|>\n<|im_start|>assistant\n" + for q in questions + ] + images = [image] * len(prompts) + + outputs = {} + for enforce_eager, mode in [(False, "eager"), (True, "graph")]: + with VllmRunner( + model, + max_model_len=8192, + limit_mm_per_prompt={"image": 4}, + enforce_eager=enforce_eager, + dtype="bfloat16", + ) as vllm_model: + generated_outputs = vllm_model.generate_greedy( + prompts=prompts, + images=images, + max_tokens=128, + ) + + assert len(generated_outputs) == len(prompts), \ + f"Expected {len(prompts)} outputs, got {len(generated_outputs)} in {mode} mode" + + for i, (_, output_str) in enumerate(generated_outputs): + assert output_str, \ + f"{mode.capitalize()} mode output {i} should not be empty. Prompt: {prompts[i]}" + assert len(output_str.strip()) > 0, \ + f"{mode.capitalize()} mode Output {i} should have meaningful content" + + outputs[mode] = generated_outputs + + eager_outputs = outputs["eager"] + graph_outputs = outputs["graph"] + + check_outputs_equal(outputs_0_lst=eager_outputs, + outputs_1_lst=graph_outputs, + name_0="eager mode", + name_1="graph mode")