xc-llm-ascend/tests/e2e/310p/test_offline_inference_310p.py

#
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
# Copyright 2023 The vLLM team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# This file is a part of the vllm-ascend project.
import pytest
from vllm.assets.image import ImageAsset

from tests.e2e.conftest import VllmRunner


@pytest.mark.parametrize("dtype", ["float16"])
@pytest.mark.parametrize("max_tokens", [5])
def test_llm_models(dtype: str, max_tokens: int) -> None:
    example_prompts = [
        "Hello, my name is",
        "The future of AI is",
    ]

    with VllmRunner("Qwen/Qwen3-0.6B",
                    tensor_parallel_size=1,
                    dtype=dtype,
                    max_model_len=2048,
                    enforce_eager=True) as vllm_model:
        vllm_model.generate_greedy(example_prompts, max_tokens)


@pytest.mark.skip(reason="310P: multimodal test skipped, offline is ok")
@pytest.mark.parametrize("dtype", ["float16"])
def test_multimodal_vl(dtype: str):
    image = ImageAsset("cherry_blossom").pil_image.convert("RGB")

    img_questions = [
        "What is the content of this image?",
        "Describe the content of this image in detail.",
        "What's in the image?",
        "Where is this image taken?",
    ]

    images = [image] * len(img_questions)
    placeholder = "<|image_pad|>"
    prompts = [
        ("<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n"
         f"<|im_start|>user\n<|vision_start|>{placeholder}<|vision_end|>"
         f"{q}<|im_end|>\n<|im_start|>assistant\n") for q in img_questions
    ]

    with VllmRunner("Qwen/Qwen2.5-VL-3B-Instruct",
                    mm_processor_kwargs={
                        "min_pixels": 28 * 28,
                        "max_pixels": 1280 * 28 * 28,
                        "fps": 1,
                    },
                    dtype=dtype,
                    max_model_len=8192,
                    enforce_eager=True,
                    limit_mm_per_prompt={"image": 1}) as vllm_model:
        outputs = vllm_model.generate_greedy(
            prompts=prompts,
            images=images,
            max_tokens=64,
        )

        assert len(outputs) == len(prompts)

        for _, output_str in outputs:
            assert output_str, "Generated output should not be empty."
[CI]Add e2e test for 310p (#1879) ### What this PR does / why we need it? Add e2e test for 310p: trigger conditions：tag, labels(ready-for-test, e2e-310p-test), schedule image: m.daocloud.io/quay.io/ascend/cann:8.1.rc1-310p-ubuntu22.04-py3.10 runner: linux-aarch64-310p-1, linux-aarch64-310p-4 model: IntervitensInc/pangu-pro-moe-model, Qwen/Qwen3-0.6B-Base, Qwen/Qwen2.5-7B-Instruct - vLLM version: v0.10.0 - vLLM main: https://github.com/vllm-project/vllm/commit/b917da442b820245f537602d752e7146e66dd37a Signed-off-by: hfadzxy <starmoon_zhang@163.com> 2025-07-30 14:52:16 +08:00			`#`
			`# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.`
			`# Copyright 2023 The vLLM team.`
			`#`
			`# Licensed under the Apache License, Version 2.0 (the "License");`
			`# you may not use this file except in compliance with the License.`
			`# You may obtain a copy of the License at`
			`#`
			`# http://www.apache.org/licenses/LICENSE-2.0`
			`#`
			`# Unless required by applicable law or agreed to in writing, software`
			`# distributed under the License is distributed on an "AS IS" BASIS,`
			`# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`# See the License for the specific language governing permissions and`
			`# limitations under the License.`
			`# This file is a part of the vllm-ascend project.`
			`import pytest`
[CI] Add 310p e2e test back (#5797) This PR add 310 e2e test back to ensure the related PR will be tested on 310. 1. for light e2e, we'll run 310p test if the changed files are located in `vllm_ascend/_310p` 2. for full e2e, we'll always run 310p test 3. for main2main test, we'll stop run 310p test - vLLM version: v0.13.0 - vLLM main: https://github.com/vllm-project/vllm/commit/2f4e6548efec402b913ffddc8726230d9311948d Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com> 2026-01-15 15:47:13 +08:00			`from vllm.assets.image import ImageAsset`
[CI]Add e2e test for 310p (#1879) ### What this PR does / why we need it? Add e2e test for 310p: trigger conditions：tag, labels(ready-for-test, e2e-310p-test), schedule image: m.daocloud.io/quay.io/ascend/cann:8.1.rc1-310p-ubuntu22.04-py3.10 runner: linux-aarch64-310p-1, linux-aarch64-310p-4 model: IntervitensInc/pangu-pro-moe-model, Qwen/Qwen3-0.6B-Base, Qwen/Qwen2.5-7B-Instruct - vLLM version: v0.10.0 - vLLM main: https://github.com/vllm-project/vllm/commit/b917da442b820245f537602d752e7146e66dd37a Signed-off-by: hfadzxy <starmoon_zhang@163.com> 2025-07-30 14:52:16 +08:00
			`from tests.e2e.conftest import VllmRunner`


			`@pytest.mark.parametrize("dtype", ["float16"])`
			`@pytest.mark.parametrize("max_tokens", [5])`
[CI] Add 310p e2e test back (#5797) This PR add 310 e2e test back to ensure the related PR will be tested on 310. 1. for light e2e, we'll run 310p test if the changed files are located in `vllm_ascend/_310p` 2. for full e2e, we'll always run 310p test 3. for main2main test, we'll stop run 310p test - vLLM version: v0.13.0 - vLLM main: https://github.com/vllm-project/vllm/commit/2f4e6548efec402b913ffddc8726230d9311948d Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com> 2026-01-15 15:47:13 +08:00			`def test_llm_models(dtype: str, max_tokens: int) -> None:`
[CI]Add e2e test for 310p (#1879) ### What this PR does / why we need it? Add e2e test for 310p: trigger conditions：tag, labels(ready-for-test, e2e-310p-test), schedule image: m.daocloud.io/quay.io/ascend/cann:8.1.rc1-310p-ubuntu22.04-py3.10 runner: linux-aarch64-310p-1, linux-aarch64-310p-4 model: IntervitensInc/pangu-pro-moe-model, Qwen/Qwen3-0.6B-Base, Qwen/Qwen2.5-7B-Instruct - vLLM version: v0.10.0 - vLLM main: https://github.com/vllm-project/vllm/commit/b917da442b820245f537602d752e7146e66dd37a Signed-off-by: hfadzxy <starmoon_zhang@163.com> 2025-07-30 14:52:16 +08:00			`example_prompts = [`
			`"Hello, my name is",`
			`"The future of AI is",`
			`]`

[CI] Add 310p e2e test back (#5797) This PR add 310 e2e test back to ensure the related PR will be tested on 310. 1. for light e2e, we'll run 310p test if the changed files are located in `vllm_ascend/_310p` 2. for full e2e, we'll always run 310p test 3. for main2main test, we'll stop run 310p test - vLLM version: v0.13.0 - vLLM main: https://github.com/vllm-project/vllm/commit/2f4e6548efec402b913ffddc8726230d9311948d Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com> 2026-01-15 15:47:13 +08:00			`with VllmRunner("Qwen/Qwen3-0.6B",`
[CI]Add e2e test for 310p (#1879) ### What this PR does / why we need it? Add e2e test for 310p: trigger conditions：tag, labels(ready-for-test, e2e-310p-test), schedule image: m.daocloud.io/quay.io/ascend/cann:8.1.rc1-310p-ubuntu22.04-py3.10 runner: linux-aarch64-310p-1, linux-aarch64-310p-4 model: IntervitensInc/pangu-pro-moe-model, Qwen/Qwen3-0.6B-Base, Qwen/Qwen2.5-7B-Instruct - vLLM version: v0.10.0 - vLLM main: https://github.com/vllm-project/vllm/commit/b917da442b820245f537602d752e7146e66dd37a Signed-off-by: hfadzxy <starmoon_zhang@163.com> 2025-07-30 14:52:16 +08:00			`tensor_parallel_size=1,`
			`dtype=dtype,`
			`max_model_len=2048,`
[CI] Add 310p e2e test back (#5797) This PR add 310 e2e test back to ensure the related PR will be tested on 310. 1. for light e2e, we'll run 310p test if the changed files are located in `vllm_ascend/_310p` 2. for full e2e, we'll always run 310p test 3. for main2main test, we'll stop run 310p test - vLLM version: v0.13.0 - vLLM main: https://github.com/vllm-project/vllm/commit/2f4e6548efec402b913ffddc8726230d9311948d Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com> 2026-01-15 15:47:13 +08:00			`enforce_eager=True) as vllm_model:`
[CI]Add e2e test for 310p (#1879) ### What this PR does / why we need it? Add e2e test for 310p: trigger conditions：tag, labels(ready-for-test, e2e-310p-test), schedule image: m.daocloud.io/quay.io/ascend/cann:8.1.rc1-310p-ubuntu22.04-py3.10 runner: linux-aarch64-310p-1, linux-aarch64-310p-4 model: IntervitensInc/pangu-pro-moe-model, Qwen/Qwen3-0.6B-Base, Qwen/Qwen2.5-7B-Instruct - vLLM version: v0.10.0 - vLLM main: https://github.com/vllm-project/vllm/commit/b917da442b820245f537602d752e7146e66dd37a Signed-off-by: hfadzxy <starmoon_zhang@163.com> 2025-07-30 14:52:16 +08:00			`vllm_model.generate_greedy(example_prompts, max_tokens)`
Add qwen-vl model and sampling feature UT for 310I series (#2168) ### What this PR does / why we need it? Add qwen-vl model and sampling feature UT for 310I series - vLLM version: v0.10.0 - vLLM main: https://github.com/vllm-project/vllm/commit/e0f63e4a3509a9323339eee67c96ac3c93d15923 Signed-off-by: leo-pony <nengjunma@outlook.com> 2025-08-02 11:26:12 +08:00

[Feature]: Support 310P device run qwen2.5/3 dense and qwen2.5vl models (#5776) ### What this PR does / why we need it? Add basic 310p support. Only dense models work with eager mode now. - vLLM version: v0.13.0 - vLLM main: https://github.com/vllm-project/vllm/commit/2f4e6548efec402b913ffddc8726230d9311948d --------- Signed-off-by: Tflowers-0129 <2906339855@qq.com> Signed-off-by: Shaoxu Cheng <2906339855@qq.com> 2026-01-17 11:49:18 +08:00			`@pytest.mark.skip(reason="310P: multimodal test skipped, offline is ok")`
			`@pytest.mark.parametrize("dtype", ["float16"])`
			`def test_multimodal_vl(dtype: str):`
[CI] Add 310p e2e test back (#5797) This PR add 310 e2e test back to ensure the related PR will be tested on 310. 1. for light e2e, we'll run 310p test if the changed files are located in `vllm_ascend/_310p` 2. for full e2e, we'll always run 310p test 3. for main2main test, we'll stop run 310p test - vLLM version: v0.13.0 - vLLM main: https://github.com/vllm-project/vllm/commit/2f4e6548efec402b913ffddc8726230d9311948d Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com> 2026-01-15 15:47:13 +08:00			`image = ImageAsset("cherry_blossom").pil_image.convert("RGB")`
Add qwen-vl model and sampling feature UT for 310I series (#2168) ### What this PR does / why we need it? Add qwen-vl model and sampling feature UT for 310I series - vLLM version: v0.10.0 - vLLM main: https://github.com/vllm-project/vllm/commit/e0f63e4a3509a9323339eee67c96ac3c93d15923 Signed-off-by: leo-pony <nengjunma@outlook.com> 2025-08-02 11:26:12 +08:00
[CI] Add 310p e2e test back (#5797) This PR add 310 e2e test back to ensure the related PR will be tested on 310. 1. for light e2e, we'll run 310p test if the changed files are located in `vllm_ascend/_310p` 2. for full e2e, we'll always run 310p test 3. for main2main test, we'll stop run 310p test - vLLM version: v0.13.0 - vLLM main: https://github.com/vllm-project/vllm/commit/2f4e6548efec402b913ffddc8726230d9311948d Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com> 2026-01-15 15:47:13 +08:00			`img_questions = [`
			`"What is the content of this image?",`
			`"Describe the content of this image in detail.",`
			`"What's in the image?",`
			`"Where is this image taken?",`
			`]`
Add qwen-vl model and sampling feature UT for 310I series (#2168) ### What this PR does / why we need it? Add qwen-vl model and sampling feature UT for 310I series - vLLM version: v0.10.0 - vLLM main: https://github.com/vllm-project/vllm/commit/e0f63e4a3509a9323339eee67c96ac3c93d15923 Signed-off-by: leo-pony <nengjunma@outlook.com> 2025-08-02 11:26:12 +08:00
[CI] Add 310p e2e test back (#5797) This PR add 310 e2e test back to ensure the related PR will be tested on 310. 1. for light e2e, we'll run 310p test if the changed files are located in `vllm_ascend/_310p` 2. for full e2e, we'll always run 310p test 3. for main2main test, we'll stop run 310p test - vLLM version: v0.13.0 - vLLM main: https://github.com/vllm-project/vllm/commit/2f4e6548efec402b913ffddc8726230d9311948d Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com> 2026-01-15 15:47:13 +08:00			`images = [image] * len(img_questions)`
			`placeholder = "<\|image_pad\|>"`
			`prompts = [`
			`("<\|im_start\|>system\nYou are a helpful assistant.<\|im_end\|>\n"`
			`f"<\|im_start\|>user\n<\|vision_start\|>{placeholder}<\|vision_end\|>"`
			`f"{q}<\|im_end\|>\n<\|im_start\|>assistant\n") for q in img_questions`
Add qwen-vl model and sampling feature UT for 310I series (#2168) ### What this PR does / why we need it? Add qwen-vl model and sampling feature UT for 310I series - vLLM version: v0.10.0 - vLLM main: https://github.com/vllm-project/vllm/commit/e0f63e4a3509a9323339eee67c96ac3c93d15923 Signed-off-by: leo-pony <nengjunma@outlook.com> 2025-08-02 11:26:12 +08:00			`]`

[CI] Add 310p e2e test back (#5797) This PR add 310 e2e test back to ensure the related PR will be tested on 310. 1. for light e2e, we'll run 310p test if the changed files are located in `vllm_ascend/_310p` 2. for full e2e, we'll always run 310p test 3. for main2main test, we'll stop run 310p test - vLLM version: v0.13.0 - vLLM main: https://github.com/vllm-project/vllm/commit/2f4e6548efec402b913ffddc8726230d9311948d Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com> 2026-01-15 15:47:13 +08:00			`with VllmRunner("Qwen/Qwen2.5-VL-3B-Instruct",`
			`mm_processor_kwargs={`
			`"min_pixels": 28 * 28,`
			`"max_pixels": 1280 * 28 * 28,`
			`"fps": 1,`
			`},`
[Feature]: Support 310P device run qwen2.5/3 dense and qwen2.5vl models (#5776) ### What this PR does / why we need it? Add basic 310p support. Only dense models work with eager mode now. - vLLM version: v0.13.0 - vLLM main: https://github.com/vllm-project/vllm/commit/2f4e6548efec402b913ffddc8726230d9311948d --------- Signed-off-by: Tflowers-0129 <2906339855@qq.com> Signed-off-by: Shaoxu Cheng <2906339855@qq.com> 2026-01-17 11:49:18 +08:00			`dtype=dtype,`
[CI] Add 310p e2e test back (#5797) This PR add 310 e2e test back to ensure the related PR will be tested on 310. 1. for light e2e, we'll run 310p test if the changed files are located in `vllm_ascend/_310p` 2. for full e2e, we'll always run 310p test 3. for main2main test, we'll stop run 310p test - vLLM version: v0.13.0 - vLLM main: https://github.com/vllm-project/vllm/commit/2f4e6548efec402b913ffddc8726230d9311948d Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com> 2026-01-15 15:47:13 +08:00			`max_model_len=8192,`
Add qwen-vl model and sampling feature UT for 310I series (#2168) ### What this PR does / why we need it? Add qwen-vl model and sampling feature UT for 310I series - vLLM version: v0.10.0 - vLLM main: https://github.com/vllm-project/vllm/commit/e0f63e4a3509a9323339eee67c96ac3c93d15923 Signed-off-by: leo-pony <nengjunma@outlook.com> 2025-08-02 11:26:12 +08:00			`enforce_eager=True,`
[CI] Add 310p e2e test back (#5797) This PR add 310 e2e test back to ensure the related PR will be tested on 310. 1. for light e2e, we'll run 310p test if the changed files are located in `vllm_ascend/_310p` 2. for full e2e, we'll always run 310p test 3. for main2main test, we'll stop run 310p test - vLLM version: v0.13.0 - vLLM main: https://github.com/vllm-project/vllm/commit/2f4e6548efec402b913ffddc8726230d9311948d Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com> 2026-01-15 15:47:13 +08:00			`limit_mm_per_prompt={"image": 1}) as vllm_model:`
			`outputs = vllm_model.generate_greedy(`
			`prompts=prompts,`
			`images=images,`
			`max_tokens=64,`
			`)`

			`assert len(outputs) == len(prompts)`

			`for _, output_str in outputs:`
			`assert output_str, "Generated output should not be empty."`