diff --git a/tests/ut/models/test_qwen2_vl.py b/tests/ut/models/test_qwen2_vl.py
new file mode 100644
index 0000000..d62b859
--- /dev/null
+++ b/tests/ut/models/test_qwen2_vl.py
@@ -0,0 +1,200 @@
+import pytest
+import torch
+from pytest_mock import MockerFixture
+from vllm.model_executor.layers.activation import QuickGELU
+
+from tests.ut.base import PytestBase
+from vllm_ascend.models.qwen2_vl import (AscendQwen2VisionAttention,
+                                         AscendQwen2VisionBlock)
+
+
+class TestAscendQwen2VisionAttention(PytestBase):
+
+    def init_attention(
+        self,
+        mocker,
+        embed_dim=1000,
+        num_heads=10,
+        projection_size=100,
+        quant_config=None,
+        prefix="",
+    ):
+        mocker_attn = mocker.patch(
+            "vllm_ascend.models.qwen2_vl.Qwen2VisionAttention.__init__")
+
+        attention = AscendQwen2VisionAttention(
+            embed_dim=embed_dim,
+            num_heads=num_heads,
+            projection_size=projection_size,
+            quant_config=quant_config,
+            prefix=prefix,
+        )
+        args, kwargs = mocker_attn.call_args
+        assert args == (embed_dim, num_heads, projection_size, None, "")
+        assert not kwargs
+        attention.num_attention_heads_per_partition = num_heads
+        return attention
+
+    def test_attn_init_should_normal(self, mocker: MockerFixture):
+        embed_dim = 1000
+        num_heads = 10
+        projection_size = 100
+        quant_config = None
+        prefix = ""
+        vit = self.init_attention(
+            embed_dim=embed_dim,
+            num_heads=num_heads,
+            projection_size=projection_size,
+            quant_config=quant_config,
+            prefix=prefix,
+            mocker=mocker,
+        )
+        assert vit.hidden_size_per_attention_head == 10
+
+    def test_attn_init_should_raise_error(self, mocker: MockerFixture):
+        embed_dim = 1000
+        num_heads = 7
+        projection_size = 100
+        quant_config = None
+        prefix = ""
+        with pytest.raises(AssertionError):
+            # projection_size should divided by num heads
+            self.init_attention(
+                mocker=mocker,
+                embed_dim=embed_dim,
+                num_heads=num_heads,
+                projection_size=projection_size,
+                quant_config=quant_config,
+                prefix=prefix,
+            )
+
+    def test_attn_forward(self, mocker: MockerFixture):
+        attention = self.init_attention(mocker=mocker)
+        mocker.patch("torch.nn.Module.__setattr__")
+        mocker.patch("torch.nn.Module.__getattr__")
+        mocker.patch("torch.nn.Module.__delattr__")
+        x = torch.rand((100, 3, 10 * 3 * 128))  # s,b, head*3*head_dim
+        cu_seqlens = torch.tensor([10, 50, 100])
+        cos = torch.rand((1, 100, 1, 128))
+        sin = torch.rand((1, 100, 1, 128))
+
+        qkv = lambda x: (x, 0)  # noqa
+        split_qkv = lambda x: [  #noqa
+            torch.rand((100, 3, 10, 128)) for i in range(3)
+        ]  # noqa
+        npu_rotary_mul = lambda q, cos, sin: q  # noqa
+        _npu_flash_attention_unpad = lambda **kwargs: kwargs["out"]  # noqa
+        proj = lambda x: (x, 0)  # noqa
+
+        mocker_qkv = mocker.patch.object(attention, "qkv", side_effect=qkv)
+        mocker_split_qkv = mocker.patch.object(
+            attention,
+            "split_qkv",
+            side_effect=split_qkv,
+        )
+        mocker_npu_rotary_mul = mocker.patch("torch_npu.npu_rotary_mul",
+                                             side_effect=npu_rotary_mul)
+        mocker_npu_flash_attention_unpad = mocker.patch(
+            "torch_npu._npu_flash_attention_unpad",
+            side_effect=_npu_flash_attention_unpad,
+        )
+        mocker_proj = mocker.patch.object(attention, "proj", side_effect=proj)
+        attention.__dict__["qkv"] = mocker_qkv
+        attention.__dict__["split_qkv"] = mocker_split_qkv
+        attention.__dict__["npu_rotary_mul"] = mocker_npu_rotary_mul
+        attention.__dict__["_npu_flash_attention_unpad"] = (
+            mocker_npu_flash_attention_unpad)
+        attention.__dict__["proj"] = mocker_proj
+
+        output = attention.forward(
+            x=x,
+            cu_seqlens=cu_seqlens,
+            cos=cos,
+            sin=sin,
+        )
+        qkv_args, qkv_kwargs = mocker_qkv.call_args
+        assert qkv_args == (x, )
+        assert not qkv_kwargs
+
+        split_qkv_args, split_qkv_kwargs = mocker_split_qkv.call_args
+        assert split_qkv_args == (x, )
+        assert not split_qkv_kwargs
+
+        npu_rotary_mul_args, npu_rotary_mul_kwargs = mocker_npu_rotary_mul.call_args
+        assert npu_rotary_mul_args[1:] == (cos, sin)
+        assert npu_rotary_mul_args[0].shape == torch.Size([3, 100, 10, 128])
+        assert not npu_rotary_mul_kwargs
+
+        assert output.shape == torch.Size([100, 3, 1280])
+
+
+class TestAscendQwen2VisionBlock(PytestBase):
+
+    def init_vision_block(
+        self,
+        mocker,
+        dim=100,
+        num_heads=10,
+        mlp_ratio=0.5,
+    ):
+        mocker_vit = mocker.patch(
+            "vllm.model_executor.models.qwen2_vl.Qwen2VisionBlock.__init__",
+            return_value=None,
+        )
+
+        mocker_attn = mocker.patch(
+            "vllm_ascend.models.qwen2_vl.AscendQwen2VisionAttention.__init__",
+            return_value=None,
+        )
+
+        mocker.patch("torch.nn.Module.__setattr__")
+        mocker.patch("torch.nn.Module.__getattr__")
+        mocker.patch("torch.nn.Module.__delattr__")
+        vision_block = AscendQwen2VisionBlock(
+            dim=dim,
+            num_heads=num_heads,
+            mlp_ratio=mlp_ratio,
+        )
+        args, kwargs = mocker_vit.call_args
+        assert args == (dim, num_heads, mlp_ratio, QuickGELU, None, None, "")
+        assert not kwargs
+
+        args1, kwargs1 = mocker_attn.call_args
+        assert not args1
+        assert kwargs1 == {
+            "embed_dim": dim,
+            "num_heads": num_heads,
+            "projection_size": dim,
+            "quant_config": None,
+            "prefix": ".attn",
+        }
+        return vision_block
+
+    def test_init_vision_block_should_normal(
+        self,
+        mocker: MockerFixture,
+    ):
+        vision_block = self.init_vision_block(mocker)
+        assert isinstance(vision_block, AscendQwen2VisionBlock)
+
+    def test_vision_block_forward(self, mocker: MockerFixture):
+        x = torch.randint(1, 100, (100, 3, 1280))  # s,b,d
+        cu_seqlens = torch.tensor([10, 50, 100])
+        cos = torch.rand((1, 100, 1, 128))
+        sin = torch.rand((1, 100, 1, 128))
+        vision_block = self.init_vision_block(mocker)
+        mocker_attn = mocker.patch.object(vision_block, "attn", return_value=x)
+        mocker_mlp = mocker.patch.object(vision_block, "mlp", return_value=x)
+        vision_block.__dict__["attn"] = mocker_attn
+        vision_block.__dict__["mlp"] = mocker_mlp
+
+        output = vision_block.forward(x.clone(), cu_seqlens, cos, sin)
+
+        _, attn_kwargs = mocker_attn.call_args
+        assert attn_kwargs == {
+            "cu_seqlens": cu_seqlens,
+            "cos": cos,
+            "sin": sin,
+        }
+
+        assert torch.all(x * 3 == output)