ut:add ut for qwen2_5_vl (#2143)
### What this PR does / why we need it?
add ut for qwen2_5_vl
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
not involved
- vLLM version: v0.10.0
- vLLM main:
2836dd73f1
Signed-off-by: Ronald1995 <ronaldautomobile@163.com>
This commit is contained in:
424
tests/ut/models/test_qwen2_5_vl.py
Normal file
424
tests/ut/models/test_qwen2_5_vl.py
Normal file
@@ -0,0 +1,424 @@
|
||||
import pytest
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
from pytest_mock import MockerFixture
|
||||
|
||||
from tests.ut.base import PytestBase
|
||||
from vllm_ascend.models.qwen2_5_vl import (
|
||||
AscendQwen2_5_VisionAttention, AscendQwen2_5_VisionBlock,
|
||||
AscendQwen2_5_VisionPatchEmbed, AscendQwen2_5_VisionRotaryEmbedding,
|
||||
AscendQwen2_5_VisionTransformer, AscendQwen2_5_VLForConditionalGeneration)
|
||||
|
||||
|
||||
class TestAscendQwen2_5_VisionAttention(PytestBase):
|
||||
|
||||
def init_attention(
|
||||
self,
|
||||
mocker,
|
||||
embed_dim=1000,
|
||||
num_heads=10,
|
||||
projection_size=100,
|
||||
quant_config=None,
|
||||
prefix="",
|
||||
):
|
||||
mocker_attn = mocker.patch(
|
||||
"vllm_ascend.models.qwen2_5_vl.Qwen2_5_VisionAttention.__init__")
|
||||
|
||||
attention = AscendQwen2_5_VisionAttention(
|
||||
embed_dim=embed_dim,
|
||||
num_heads=num_heads,
|
||||
projection_size=projection_size,
|
||||
quant_config=quant_config,
|
||||
prefix=prefix,
|
||||
)
|
||||
args, kwargs = mocker_attn.call_args
|
||||
assert args == (embed_dim, num_heads, projection_size, None, "")
|
||||
assert not kwargs
|
||||
attention.num_attention_heads_per_partition = num_heads
|
||||
return attention
|
||||
|
||||
def test_attn_init_should_normal(self, mocker: MockerFixture):
|
||||
embed_dim = 1000
|
||||
num_heads = 10
|
||||
projection_size = 100
|
||||
quant_config = None
|
||||
prefix = ""
|
||||
vit = self.init_attention(
|
||||
embed_dim=embed_dim,
|
||||
num_heads=num_heads,
|
||||
projection_size=projection_size,
|
||||
quant_config=quant_config,
|
||||
prefix=prefix,
|
||||
mocker=mocker,
|
||||
)
|
||||
assert vit.embed_dim == 1000
|
||||
assert vit.hidden_size_per_attention_head == 10
|
||||
|
||||
def test_attn_init_should_raise_error(self, mocker: MockerFixture):
|
||||
embed_dim = 1000
|
||||
num_heads = 7
|
||||
projection_size = 100
|
||||
quant_config = None
|
||||
prefix = ""
|
||||
with pytest.raises(AssertionError):
|
||||
# projection_size should divided by num heads
|
||||
self.init_attention(
|
||||
mocker=mocker,
|
||||
embed_dim=embed_dim,
|
||||
num_heads=num_heads,
|
||||
projection_size=projection_size,
|
||||
quant_config=quant_config,
|
||||
prefix=prefix,
|
||||
)
|
||||
|
||||
def test_split_qkv(self, mocker: MockerFixture):
|
||||
attention = self.init_attention(mocker=mocker)
|
||||
mocker.patch("torch.nn.Module.__setattr__")
|
||||
mocker.patch("torch.nn.Module.__getattr__")
|
||||
mocker.patch("torch.nn.Module.__delattr__")
|
||||
q, k, v = attention.split_qkv(torch.rand((100, 10, 300)))
|
||||
assert q.shape == (100, 10, 10, 10)
|
||||
assert k.shape == (100, 10, 10, 10)
|
||||
assert v.shape == (100, 10, 10, 10)
|
||||
|
||||
def test_attn_forward(self, mocker: MockerFixture):
|
||||
attention = self.init_attention(mocker=mocker)
|
||||
mocker.patch("torch.nn.Module.__setattr__")
|
||||
mocker.patch("torch.nn.Module.__getattr__")
|
||||
mocker.patch("torch.nn.Module.__delattr__")
|
||||
x = torch.rand((100, 3, 10 * 3 * 128)) # s,b, head*3*head_dim
|
||||
cu_seqlens = torch.tensor([10, 50, 100])
|
||||
cos = torch.rand((1, 100, 1, 128))
|
||||
sin = torch.rand((1, 100, 1, 128))
|
||||
|
||||
qkv = lambda x: (x, 0) # noqa
|
||||
split_qkv = lambda x: [ #noqa
|
||||
torch.rand((100, 3, 10, 128)) for i in range(3)
|
||||
] # noqa
|
||||
npu_rotary_mul = lambda q, cos, sin: q # noqa
|
||||
_npu_flash_attention_unpad = lambda **kwargs: kwargs["out"] # noqa
|
||||
proj = lambda x: (x, 0) # noqa
|
||||
|
||||
mocker_qkv = mocker.patch.object(attention, "qkv", side_effect=qkv)
|
||||
mocker_split_qkv = mocker.patch.object(
|
||||
attention,
|
||||
"split_qkv",
|
||||
side_effect=split_qkv,
|
||||
)
|
||||
mocker_npu_rotary_mul = mocker.patch("torch_npu.npu_rotary_mul",
|
||||
side_effect=npu_rotary_mul)
|
||||
mocker_npu_flash_attention_unpad = mocker.patch(
|
||||
"torch_npu._npu_flash_attention_unpad",
|
||||
side_effect=_npu_flash_attention_unpad,
|
||||
)
|
||||
mocker_proj = mocker.patch.object(attention, "proj", side_effect=proj)
|
||||
attention.__dict__["qkv"] = mocker_qkv
|
||||
attention.__dict__["split_qkv"] = mocker_split_qkv
|
||||
attention.__dict__["npu_rotary_mul"] = mocker_npu_rotary_mul
|
||||
attention.__dict__["_npu_flash_attention_unpad"] = (
|
||||
mocker_npu_flash_attention_unpad)
|
||||
attention.__dict__["proj"] = mocker_proj
|
||||
|
||||
output = attention.forward(
|
||||
x=x,
|
||||
cu_seqlens=cu_seqlens,
|
||||
cos=cos,
|
||||
sin=sin,
|
||||
)
|
||||
qkv_args, qkv_kwargs = mocker_qkv.call_args
|
||||
assert qkv_args == (x, )
|
||||
assert not qkv_kwargs
|
||||
|
||||
split_qkv_args, split_qkv_kwargs = mocker_split_qkv.call_args
|
||||
assert split_qkv_args == (x, )
|
||||
assert not split_qkv_kwargs
|
||||
|
||||
npu_rotary_mul_args, npu_rotary_mul_kwargs = mocker_npu_rotary_mul.call_args
|
||||
assert npu_rotary_mul_args[1:] == (cos, sin)
|
||||
assert npu_rotary_mul_args[0].shape == torch.Size([3, 100, 10, 128])
|
||||
assert not npu_rotary_mul_kwargs
|
||||
|
||||
assert output.shape == torch.Size([100, 3, 1280])
|
||||
|
||||
|
||||
class TestAscendQwen2_5_VisionBlock(PytestBase):
|
||||
|
||||
def init_vision_block(
|
||||
self,
|
||||
mocker,
|
||||
dim=100,
|
||||
num_heads=10,
|
||||
mlp_hidden_dim=100,
|
||||
):
|
||||
mocker_vit = mocker.patch(
|
||||
"vllm.model_executor.models.qwen2_5_vl.Qwen2_5_VisionBlock.__init__",
|
||||
return_value=None,
|
||||
)
|
||||
|
||||
mocker_attn = mocker.patch(
|
||||
"vllm_ascend.models.qwen2_5_vl.AscendQwen2_5_VisionAttention.__init__",
|
||||
return_value=None,
|
||||
)
|
||||
|
||||
mocker.patch("torch.nn.Module.__setattr__")
|
||||
mocker.patch("torch.nn.Module.__getattr__")
|
||||
mocker.patch("torch.nn.Module.__delattr__")
|
||||
vision_block = AscendQwen2_5_VisionBlock(
|
||||
dim=dim,
|
||||
num_heads=num_heads,
|
||||
mlp_hidden_dim=mlp_hidden_dim,
|
||||
)
|
||||
args, kwargs = mocker_vit.call_args
|
||||
assert args == (dim, num_heads, mlp_hidden_dim, F.silu, None, None, "")
|
||||
assert not kwargs
|
||||
|
||||
args1, kwargs1 = mocker_attn.call_args
|
||||
assert not args1
|
||||
assert kwargs1 == {
|
||||
"embed_dim": dim,
|
||||
"num_heads": num_heads,
|
||||
"projection_size": dim,
|
||||
"quant_config": None,
|
||||
"prefix": ".attn",
|
||||
}
|
||||
return vision_block
|
||||
|
||||
def test_init_vision_block_should_normal(
|
||||
self,
|
||||
mocker: MockerFixture,
|
||||
):
|
||||
vision_block = self.init_vision_block(mocker)
|
||||
assert isinstance(vision_block, AscendQwen2_5_VisionBlock)
|
||||
|
||||
def test_vision_block_forward(self, mocker: MockerFixture):
|
||||
x = torch.randint(1, 100, (100, 3, 1280)) # s,b,d
|
||||
cu_seqlens = torch.tensor([10, 50, 100])
|
||||
cos = torch.rand((1, 100, 1, 128))
|
||||
sin = torch.rand((1, 100, 1, 128))
|
||||
vision_block = self.init_vision_block(mocker)
|
||||
mocker_attn = mocker.patch.object(vision_block, "attn", return_value=x)
|
||||
mocker_mlp = mocker.patch.object(vision_block, "mlp", return_value=x)
|
||||
vision_block.__dict__["attn"] = mocker_attn
|
||||
vision_block.__dict__["mlp"] = mocker_mlp
|
||||
|
||||
output = vision_block.forward(x.clone(), cu_seqlens, cos, sin)
|
||||
|
||||
_, attn_kwargs = mocker_attn.call_args
|
||||
assert attn_kwargs == {
|
||||
"cu_seqlens": cu_seqlens,
|
||||
"cos": cos,
|
||||
"sin": sin,
|
||||
}
|
||||
|
||||
assert torch.all(x * 3 == output)
|
||||
|
||||
|
||||
class TestAscendQwen2_5_VisionPatchEmbed(PytestBase):
|
||||
|
||||
def test_forward(self):
|
||||
patch_embed = AscendQwen2_5_VisionPatchEmbed()
|
||||
|
||||
ret = patch_embed(torch.rand((120, 1176)))
|
||||
assert ret.shape == (120, 1152)
|
||||
|
||||
|
||||
class TestAscendQwen2_5_VisionRotaryEmbedding(PytestBase):
|
||||
|
||||
def init_rotary_embedding(
|
||||
self,
|
||||
mocker,
|
||||
dim=128,
|
||||
):
|
||||
mocker_ebed = mocker.patch(
|
||||
"vllm_ascend.models.qwen2_5_vl.Qwen2_5_VisionRotaryEmbedding.__init__",
|
||||
return_value=None,
|
||||
)
|
||||
mocker.patch("torch.nn.Module.__setattr__")
|
||||
mocker.patch("torch.nn.Module.__getattr__")
|
||||
mocker.patch("torch.nn.Module.__delattr__")
|
||||
rotary_embedding = AscendQwen2_5_VisionRotaryEmbedding(dim=dim, )
|
||||
args, kwargs = mocker_ebed.call_args
|
||||
assert args == (dim, 10000.0)
|
||||
assert not kwargs
|
||||
return rotary_embedding
|
||||
|
||||
def test_init_rotary_embedding_should_normal(self, mocker: MockerFixture):
|
||||
rotary_embedding = self.init_rotary_embedding(mocker)
|
||||
assert isinstance(rotary_embedding,
|
||||
AscendQwen2_5_VisionRotaryEmbedding)
|
||||
|
||||
|
||||
class TestAscendQwen2_5_VisionTransformer(PytestBase):
|
||||
|
||||
input_data = torch.tensor([[0.1, 0.2], [0.3, 0.4]])
|
||||
|
||||
def init_vision_transformer(
|
||||
self,
|
||||
mocker,
|
||||
):
|
||||
norm_eps = 1e-6
|
||||
vision_config = mocker.MagicMock()
|
||||
vision_config.patch_size = 16
|
||||
vision_config.temporal_patch_size = 2
|
||||
vision_config.in_channels = 3
|
||||
vision_config.hidden_act = "gelu"
|
||||
vision_config.depth = 0
|
||||
vision_config.num_heads = 10
|
||||
vision_config.hidden_size = 300
|
||||
|
||||
mocker.patch(
|
||||
"vllm_ascend.models.qwen2_5_vl.parallel_state.get_tensor_model_parallel_rank",
|
||||
return_value=0,
|
||||
)
|
||||
mocker.patch("vllm.distributed.utils.divide", return_value=100)
|
||||
mocker.patch(
|
||||
"vllm.model_executor.layers.linear.get_tensor_model_parallel_world_size",
|
||||
return_value=2,
|
||||
)
|
||||
mocker.patch(
|
||||
"vllm.model_executor.layers.linear.divide",
|
||||
return_value=2,
|
||||
)
|
||||
mocker.patch(
|
||||
"vllm.model_executor.layers.linear.get_tensor_model_parallel_rank",
|
||||
return_value=0)
|
||||
mocker.patch(
|
||||
"vllm_ascend.models.qwen2_5_vl.parallel_state.get_tensor_model_parallel_world_size",
|
||||
return_value=2,
|
||||
)
|
||||
|
||||
vision_transformer = AscendQwen2_5_VisionTransformer(
|
||||
vision_config,
|
||||
norm_eps,
|
||||
)
|
||||
|
||||
assert not vision_transformer.interleaved
|
||||
return vision_transformer
|
||||
|
||||
def test_init_vision_transformer(self, mocker: MockerFixture):
|
||||
vision_transformer = self.init_vision_transformer(mocker)
|
||||
assert isinstance(vision_transformer, AscendQwen2_5_VisionTransformer)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"interleaved, expected",
|
||||
[
|
||||
(
|
||||
False,
|
||||
torch.tensor([
|
||||
input_data[0, 0].cos(),
|
||||
input_data[0, 1].cos(),
|
||||
input_data[0, 0].cos(),
|
||||
input_data[0, 1].cos(),
|
||||
input_data[1, 0].cos(),
|
||||
input_data[1, 1].cos(),
|
||||
input_data[1, 0].cos(),
|
||||
input_data[1, 1].cos(),
|
||||
]),
|
||||
),
|
||||
(
|
||||
True,
|
||||
torch.tensor([
|
||||
input_data[0, 0].cos(),
|
||||
input_data[0, 0].cos(),
|
||||
input_data[0, 1].cos(),
|
||||
input_data[0, 1].cos(),
|
||||
input_data[1, 0].cos(),
|
||||
input_data[1, 0].cos(),
|
||||
input_data[1, 1].cos(),
|
||||
input_data[1, 1].cos(),
|
||||
]),
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_cal_cos_sin(self, interleaved, expected, mocker: MockerFixture):
|
||||
vision_transformer = self.init_vision_transformer(mocker)
|
||||
mocker.patch("torch.nn.Module.__setattr__")
|
||||
mocker.patch("torch.nn.Module.__getattr__")
|
||||
mocker.patch("torch.nn.Module.__delattr__")
|
||||
vision_transformer.__dict__["interleaved"] = interleaved
|
||||
vision_transformer.__dict__["hidden_size_per_attention_head"] = 2
|
||||
vision_transformer.hidden_size_per_attention_head = 4
|
||||
cos_new, _ = vision_transformer.cal_cos_sin(self.input_data)
|
||||
assert cos_new.shape == (1, 32, 1, 2)
|
||||
|
||||
def test_forward(self, mocker: MockerFixture):
|
||||
vision_transformer = self.init_vision_transformer(mocker)
|
||||
mocker.patch("torch.nn.Module.__setattr__")
|
||||
mocker.patch("torch.nn.Module.__getattr__")
|
||||
mocker.patch("torch.nn.Module.__delattr__")
|
||||
x = torch.randn(1, 3, 224, 224)
|
||||
grid_thw = torch.tensor([[1, 4, 4]])
|
||||
mocker_patch_embed = mocker.patch.object(
|
||||
vision_transformer,
|
||||
"patch_embed",
|
||||
side_effect=lambda _: torch.randn(16, 512), # noqa
|
||||
)
|
||||
mocker_rot_pos_emb = mocker.patch.object(
|
||||
vision_transformer,
|
||||
"rot_pos_emb",
|
||||
side_effect=lambda _: torch.randn(16, 64), # noqa
|
||||
)
|
||||
mocker_get_window_index = mocker.patch.object(
|
||||
vision_transformer,
|
||||
"get_window_index",
|
||||
side_effect=lambda _: (torch.arange(8), [4, 8, 12, 16]), # noqa
|
||||
)
|
||||
mocker_cal_cos_sin = mocker.patch.object(
|
||||
vision_transformer,
|
||||
"cal_cos_sin",
|
||||
side_effect=lambda _:
|
||||
(torch.randn(16, 32), torch.randn(16, 32)), # noqa
|
||||
)
|
||||
mocker_merger = mocker.patch.object(
|
||||
vision_transformer,
|
||||
"merger",
|
||||
side_effect=lambda _: torch.randn(16, 256), # noqa
|
||||
)
|
||||
vision_transformer.__dict__["vision_blocks"] = [
|
||||
lambda *args, **kwargs: torch.randn(16, 1, 512) # noqa
|
||||
]
|
||||
vision_transformer.__dict__["patch_embed"] = mocker_patch_embed
|
||||
vision_transformer.__dict__["rot_pos_emb"] = mocker_rot_pos_emb
|
||||
vision_transformer.__dict__[
|
||||
"get_window_index"] = mocker_get_window_index
|
||||
vision_transformer.__dict__["cal_cos_sin"] = mocker_cal_cos_sin
|
||||
vision_transformer.__dict__["merger"] = mocker_merger
|
||||
vision_transformer.__dict__["fullatt_block_indexes"] = [0, 2]
|
||||
vision_transformer.__dict__["spatial_merge_unit"] = 2
|
||||
ret = vision_transformer.forward(x, grid_thw)
|
||||
assert ret.shape == (8, 256)
|
||||
mocker_patch_embed.assert_called_with(x)
|
||||
mocker_rot_pos_emb.assert_called_with(grid_thw)
|
||||
mocker_get_window_index.assert_called_with(grid_thw)
|
||||
mocker_cal_cos_sin.assert_called_once()
|
||||
mocker_merger.assert_called_once()
|
||||
|
||||
|
||||
class TestAscendQwen2_5_VLForConditionalGeneration(PytestBase):
|
||||
|
||||
def test_init_vl_for_conditional_generation(self, mocker: MockerFixture):
|
||||
vllm_config = mocker.MagicMock()
|
||||
vllm_config.vision_config = "vision_config"
|
||||
vllm_config.rms_norm_eps = 1e-5
|
||||
mocker.patch("torch.nn.Module.__setattr__")
|
||||
mocker.patch("torch.nn.Module.__getattr__")
|
||||
mocker.patch("torch.nn.Module.__delattr__")
|
||||
mocker_vl = mocker.patch(
|
||||
"vllm.model_executor.models.qwen2_5_vl.Qwen2_5_VLForConditionalGeneration.__init__",
|
||||
return_value=None,
|
||||
)
|
||||
mocker_vit = mocker.patch(
|
||||
"vllm_ascend.models.qwen2_5_vl.AscendQwen2_5_VisionTransformer.__init__",
|
||||
return_value=None,
|
||||
)
|
||||
|
||||
vl_for_conditional_generation = AscendQwen2_5_VLForConditionalGeneration(
|
||||
vllm_config=vllm_config)
|
||||
args, kwargs = mocker_vl.call_args
|
||||
assert not args
|
||||
assert kwargs == {"vllm_config": vllm_config, "prefix": ""}
|
||||
mocker_vit.assert_called_once()
|
||||
assert isinstance(
|
||||
vl_for_conditional_generation,
|
||||
AscendQwen2_5_VLForConditionalGeneration,
|
||||
)
|
||||
Reference in New Issue
Block a user