diff --git a/tests/ut/base.py b/tests/ut/base.py index 6bdf1f4..065e68e 100644 --- a/tests/ut/base.py +++ b/tests/ut/base.py @@ -15,8 +15,14 @@ import unittest +import pytest + from vllm_ascend.utils import adapt_patch, register_ascend_customop +# fused moe ops test will hit the infer_schema error, we need add the patch +# here to make the test pass. +import vllm_ascend.patch.worker.patch_common.patch_utils # type: ignore[import] # isort: skip # noqa + class TestBase(unittest.TestCase): @@ -27,3 +33,16 @@ class TestBase(unittest.TestCase): register_ascend_customop() super().setUp() super(TestBase, self).__init__(*args, **kwargs) + + +class PytestBase: + """Base class for pytest-based tests. + because pytest mocker and parametrize usage are not compatible with unittest. + so we need to use a separate base class for pytest tests. + """ + + @pytest.fixture(autouse=True) + def setup(self): + adapt_patch(True) + adapt_patch() + register_ascend_customop() diff --git a/tests/ut/models/__init__.py b/tests/ut/models/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/ut/models/test_qwen2_5_vl_without_padding.py b/tests/ut/models/test_qwen2_5_vl_without_padding.py new file mode 100644 index 0000000..0ae1afa --- /dev/null +++ b/tests/ut/models/test_qwen2_5_vl_without_padding.py @@ -0,0 +1,392 @@ +import pytest +import torch +import torch.nn.functional as F +from pytest_mock import MockerFixture + +from tests.ut.base import PytestBase +from vllm_ascend.models.qwen2_5_vl_without_padding import ( + AscendQwen2_5_VisionAttention_Without_Padding, + AscendQwen2_5_VisionBlock_Without_Padding, + AscendQwen2_5_VisionPatchEmbed_Without_Padding, + AscendQwen2_5_VisionTransformer_Without_Padding, + AscendQwen2_5_VLForConditionalGeneration_Without_Padding) + + +class TestAscendQwen2_5_VisionAttention_Without_Padding(PytestBase): + + def init_attention( + self, + mocker, + embed_dim=1000, + num_heads=10, + projection_size=100, + quant_config=None, + prefix="", + ): + mocker_attn = mocker.patch( + "vllm_ascend.models.qwen2_5_vl_without_padding.Qwen2_5_VisionAttention.__init__" + ) + + attention = AscendQwen2_5_VisionAttention_Without_Padding( + embed_dim=embed_dim, + num_heads=num_heads, + projection_size=projection_size, + quant_config=quant_config, + prefix=prefix, + ) + args, kwargs = mocker_attn.call_args + assert args == (embed_dim, num_heads, projection_size, None, "") + assert not kwargs + attention.num_attention_heads_per_partition = num_heads + return attention + + def test_vit_init_should_normal(self, mocker: MockerFixture): + embed_dim = 1000 + num_heads = 10 + projection_size = 100 + quant_config = None + prefix = "" + vit = self.init_attention( + embed_dim=embed_dim, + num_heads=num_heads, + projection_size=projection_size, + quant_config=quant_config, + prefix=prefix, + mocker=mocker, + ) + assert vit.embed_dim == 1000 + assert vit.hidden_size_per_attention_head == 10 + + def test_vit_init_should_raise_error(self, mocker: MockerFixture): + embed_dim = 1000 + num_heads = 7 + projection_size = 100 + quant_config = None + prefix = "" + with pytest.raises(AssertionError): + # projection_size should divided by num heads + self.init_attention( + mocker=mocker, + embed_dim=embed_dim, + num_heads=num_heads, + projection_size=projection_size, + quant_config=quant_config, + prefix=prefix, + ) + + def test_vit_forward(self, mocker: MockerFixture): + mocker.patch("torch.nn.Module.__setattr__") + mocker.patch("torch.nn.Module.__getattr__") + mocker.patch("torch.nn.Module.__delattr__") + attention = self.init_attention(mocker=mocker) + x = torch.rand((100, 3, 10 * 3 * 128)) # s,b, head*3*head_dim + cu_seqlens = torch.tensor([10, 50, 100]) + cos = torch.rand((1, 100, 1, 128)) + sin = torch.rand((1, 100, 1, 128)) + + qkv = lambda x: (x, 0) # noqa + split_qkv = lambda x: [ #noqa + torch.rand((100, 3, 10, 128)) for i in range(3) + ] # noqa + npu_rotary_mul = lambda q, cos, sin: q # noqa + _npu_flash_attention_unpad = lambda **kwargs: kwargs["out"] # noqa + proj = lambda x: (x, 0) # noqa + + mocker_qkv = mocker.patch.object(attention, "qkv", side_effect=qkv) + mocker_split_qkv = mocker.patch.object( + attention, + "split_qkv", + side_effect=split_qkv, + ) + mocker_npu_rotary_mul = mocker.patch("torch_npu.npu_rotary_mul", + side_effect=npu_rotary_mul) + mocker_npu_flash_attention_unpad = mocker.patch( + "torch_npu._npu_flash_attention_unpad", + side_effect=_npu_flash_attention_unpad, + ) + mocker_proj = mocker.patch.object(attention, "proj", side_effect=proj) + attention.__dict__["qkv"] = mocker_qkv + attention.__dict__["split_qkv"] = mocker_split_qkv + attention.__dict__["npu_rotary_mul"] = mocker_npu_rotary_mul + attention.__dict__["_npu_flash_attention_unpad"] = ( + mocker_npu_flash_attention_unpad) + attention.__dict__["proj"] = mocker_proj + + output = attention.forward( + x=x, + cu_seqlens=cu_seqlens, + cos=cos, + sin=sin, + ) + qkv_args, qkv_kwargs = mocker_qkv.call_args + assert qkv_args == (x, ) + assert not qkv_kwargs + + split_qkv_args, split_qkv_kwargs = mocker_split_qkv.call_args + assert split_qkv_args == (x, ) + assert not split_qkv_kwargs + + npu_rotary_mul_args, npu_rotary_mul_kwargs = mocker_npu_rotary_mul.call_args + assert npu_rotary_mul_args[1:] == (cos, sin) + assert npu_rotary_mul_args[0].shape == torch.Size([3, 100, 10, 128]) + assert not npu_rotary_mul_kwargs + + assert output.shape == torch.Size([100, 3, 1280]) + + +class TestAscendQwen2_5_VisionBlock_Without_Padding(PytestBase): + + def init_vision_block( + self, + mocker, + dim=100, + num_heads=10, + mlp_hidden_dim=100, + ): + mocker_vit = mocker.patch( + "vllm.model_executor.models.qwen2_5_vl.Qwen2_5_VisionBlock.__init__", + return_value=None, + ) + + mocker_attn = mocker.patch( + "vllm_ascend.models.qwen2_5_vl_without_padding.AscendQwen2_5_VisionAttention_Without_Padding.__init__", + return_value=None, + ) + + mocker.patch("torch.nn.Module.__setattr__") + mocker.patch("torch.nn.Module.__getattr__") + mocker.patch("torch.nn.Module.__delattr__") + vision_block = AscendQwen2_5_VisionBlock_Without_Padding( + dim=dim, + num_heads=num_heads, + mlp_hidden_dim=mlp_hidden_dim, + ) + args, kwargs = mocker_vit.call_args + assert args == (dim, num_heads, mlp_hidden_dim, F.silu, None, None, "") + assert not kwargs + + args1, kwargs1 = mocker_attn.call_args + assert not args1 + assert kwargs1 == { + "embed_dim": dim, + "num_heads": num_heads, + "projection_size": dim, + "quant_config": None, + "prefix": ".attn", + } + return vision_block + + def test_init_vision_block_should_normal( + self, + mocker: MockerFixture, + ): + vision_block = self.init_vision_block(mocker) + assert isinstance(vision_block, + AscendQwen2_5_VisionBlock_Without_Padding) + + def test_vision_block_forward(self, mocker: MockerFixture): + x = torch.randint(1, 100, (100, 3, 1280)) # s,b,d + cu_seqlens = torch.tensor([10, 50, 100]) + cos = torch.rand((1, 100, 1, 128)) + sin = torch.rand((1, 100, 1, 128)) + vision_block = self.init_vision_block(mocker) + mocker_attn = mocker.patch.object(vision_block, "attn", return_value=x) + mocker_mlp = mocker.patch.object(vision_block, "mlp", return_value=x) + vision_block.__dict__["attn"] = mocker_attn + vision_block.__dict__["mlp"] = mocker_mlp + + output = vision_block.forward(x.clone(), cu_seqlens, cos, sin) + + _, attn_kwargs = mocker_attn.call_args + assert attn_kwargs == { + "cu_seqlens": cu_seqlens, + "cos": cos, + "sin": sin, + } + + assert torch.all(x * 3 == output) + + +class TestAscendQwen2_5_VisionPatchEmbed_Without_Padding(PytestBase): + + def test_forward(self): + patch_embed = AscendQwen2_5_VisionPatchEmbed_Without_Padding() + + ret = patch_embed(torch.rand((120, 1176))) + assert ret.shape == (120, 1152) + + +class TestAscendQwen2_5_VisionTransformer_Without_Padding(PytestBase): + + input_data = torch.tensor([[0.1, 0.2], [0.3, 0.4]]) + + def init_vision_transformer( + self, + mocker, + ): + norm_eps = 1e-6 + vision_config = mocker.MagicMock() + vision_config.patch_size = 16 + vision_config.temporal_patch_size = 2 + vision_config.in_channels = 3 + vision_config.hidden_act = "gelu" + vision_config.depth = 0 + + mocker.patch("torch.nn.Module.__setattr__") + mocker.patch("torch.nn.Module.__getattr__") + mocker.patch("torch.nn.Module.__delattr__") + mocker_vit = mocker.patch( + "vllm.model_executor.models.qwen2_5_vl.Qwen2_5_VisionTransformer.__init__", + return_value=None, + ) + mocker.patch( + "vllm_ascend.models.qwen2_5_vl_without_padding.AscendQwen2_5_VisionBlock_Without_Padding.__init__", + return_value=None, + ) + mocker.patch( + "vllm_ascend.models.qwen2_5_vl_without_padding.AscendQwen2_5_VisionPatchEmbed_Without_Padding.__init__", + return_value=None, + ) + mocker.patch( + "vllm_ascend.models.qwen2_5_vl_without_padding.parallel_state.get_tensor_model_parallel_world_size", + return_value=1, + ) + mocker.patch( + "vllm_ascend.models.qwen2_5_vl_without_padding.parallel_state.get_tensor_model_parallel_rank", + return_value=0, + ) + mocker.patch("vllm.distributed.utils.divide", return_value=100) + + vision_transformer = AscendQwen2_5_VisionTransformer_Without_Padding( + vision_config, + norm_eps, + ) + args, kwargs = mocker_vit.call_args + assert args == (vision_config, norm_eps, None, "") + assert not kwargs + + return vision_transformer + + def test_init_vision_transformer(self, mocker: MockerFixture): + vision_transformer = self.init_vision_transformer(mocker) + assert isinstance(vision_transformer, + AscendQwen2_5_VisionTransformer_Without_Padding) + + @pytest.mark.parametrize( + "interleaved, expected", + [ + ( + False, + torch.tensor([ + input_data[0, 0].cos(), + input_data[0, 1].cos(), + input_data[0, 0].cos(), + input_data[0, 1].cos(), + input_data[1, 0].cos(), + input_data[1, 1].cos(), + input_data[1, 0].cos(), + input_data[1, 1].cos(), + ]), + ), + ( + True, + torch.tensor([ + input_data[0, 0].cos(), + input_data[0, 0].cos(), + input_data[0, 1].cos(), + input_data[0, 1].cos(), + input_data[1, 0].cos(), + input_data[1, 0].cos(), + input_data[1, 1].cos(), + input_data[1, 1].cos(), + ]), + ), + ], + ) + def test_cal_cos_sin(self, interleaved, expected, mocker: MockerFixture): + vision_transformer = self.init_vision_transformer(mocker) + vision_transformer.__dict__["interleaved"] = interleaved + vision_transformer.__dict__["hidden_size_per_attention_head"] = 2 + vision_transformer.hidden_size_per_attention_head = 4 + cos_new, _ = vision_transformer.cal_cos_sin(self.input_data) + assert cos_new.shape == (1, 4, 1, 2) + assert torch.allclose(cos_new.view(-1), expected) + + def test_forward(self, mocker: MockerFixture): + vision_transformer = self.init_vision_transformer(mocker) + x = torch.randn(1, 3, 224, 224) + grid_thw = torch.tensor([[1, 4, 4]]) + mocker_patch_embed = mocker.patch.object( + vision_transformer, + "patch_embed", + side_effect=lambda _: torch.randn(16, 512), # noqa + ) + mocker_rot_pos_emb = mocker.patch.object( + vision_transformer, + "rot_pos_emb", + side_effect=lambda _: torch.randn(16, 64), # noqa + ) + mocker_get_window_index = mocker.patch.object( + vision_transformer, + "get_window_index", + side_effect=lambda _: (torch.arange(8), [4, 8, 12, 16]), # noqa + ) + mocker_cal_cos_sin = mocker.patch.object( + vision_transformer, + "cal_cos_sin", + side_effect=lambda _: + (torch.randn(16, 32), torch.randn(16, 32)), # noqa + ) + mocker_merger = mocker.patch.object( + vision_transformer, + "merger", + side_effect=lambda _: torch.randn(16, 256), # noqa + ) + vision_transformer.__dict__["vision_blocks"] = [ + lambda *args, **kwargs: torch.randn(16, 1, 512) # noqa + ] + vision_transformer.__dict__["patch_embed"] = mocker_patch_embed + vision_transformer.__dict__["rot_pos_emb"] = mocker_rot_pos_emb + vision_transformer.__dict__[ + "get_window_index"] = mocker_get_window_index + vision_transformer.__dict__["cal_cos_sin"] = mocker_cal_cos_sin + vision_transformer.__dict__["merger"] = mocker_merger + vision_transformer.__dict__["fullatt_block_indexes"] = [0, 2] + vision_transformer.__dict__["spatial_merge_unit"] = 2 + ret = vision_transformer.forward(x, grid_thw) + assert ret.shape == (8, 256) + mocker_patch_embed.assert_called_with(x) + mocker_rot_pos_emb.assert_called_with(grid_thw) + mocker_get_window_index.assert_called_with(grid_thw) + mocker_cal_cos_sin.assert_called_once() + mocker_merger.assert_called_once() + + +class TestAscendQwen2_5_VLForConditionalGeneration_Without_Padding(PytestBase): + + def test_init_vl_for_conditional_generation(self, mocker: MockerFixture): + vllm_config = mocker.MagicMock() + vllm_config.vision_config = "vision_config" + vllm_config.rms_norm_eps = 1e-5 + mocker.patch("torch.nn.Module.__setattr__") + mocker.patch("torch.nn.Module.__getattr__") + mocker.patch("torch.nn.Module.__delattr__") + mocker_vl = mocker.patch( + "vllm.model_executor.models.qwen2_5_vl.Qwen2_5_VLForConditionalGeneration.__init__", + return_value=None, + ) + mocker_vit = mocker.patch( + "vllm_ascend.models.qwen2_5_vl_without_padding.AscendQwen2_5_VisionTransformer_Without_Padding.__init__", + return_value=None, + ) + + vl_for_conditional_generation = AscendQwen2_5_VLForConditionalGeneration_Without_Padding( + vllm_config=vllm_config) + args, kwargs = mocker_vl.call_args + assert not args + assert kwargs == {"vllm_config": vllm_config, "prefix": ""} + mocker_vit.assert_called_once() + assert isinstance( + vl_for_conditional_generation, + AscendQwen2_5_VLForConditionalGeneration_Without_Padding, + )