v0.10.1rc1
This commit is contained in:
112
tests/ut/patch/worker/patch_common/test_patch_distributed.py
Normal file
112
tests/ut/patch/worker/patch_common/test_patch_distributed.py
Normal file
@@ -0,0 +1,112 @@
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# This file is a part of the vllm-ascend project.
|
||||
#
|
||||
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import torch
|
||||
from vllm.distributed.parallel_state import GroupCoordinator
|
||||
|
||||
from tests.ut.base import TestBase
|
||||
from vllm_ascend.patch.worker.patch_common.patch_distributed import \
|
||||
GroupCoordinatorPatch
|
||||
|
||||
|
||||
class TestPatchDistributed(TestBase):
|
||||
|
||||
def setUp(self):
|
||||
self.mock_group_ranks = [[0, 1]]
|
||||
self.mock_local_rank = 0
|
||||
self.mock_backend = "hccl"
|
||||
self.mock_use_device_comm = True
|
||||
|
||||
patcher_get_rank = patch("torch.distributed.get_rank", return_value=0)
|
||||
patcher_new_group = patch("torch.distributed.new_group",
|
||||
return_value=MagicMock())
|
||||
patcher_is_cuda_alike = patch(
|
||||
"vllm.platforms.current_platform.is_cuda_alike", return_value=True)
|
||||
patcher_device_comm_cls = patch(
|
||||
"vllm.distributed.parallel_state.resolve_obj_by_qualname",
|
||||
return_value=MagicMock())
|
||||
|
||||
self.mock_get_rank = patcher_get_rank.start()
|
||||
self.mock_new_group = patcher_new_group.start()
|
||||
self.mock_is_cuda_alike = patcher_is_cuda_alike.start()
|
||||
self.mock_resolve_obj = patcher_device_comm_cls.start()
|
||||
|
||||
self.addCleanup(patcher_get_rank.stop)
|
||||
self.addCleanup(patcher_new_group.stop)
|
||||
self.addCleanup(patcher_is_cuda_alike.stop)
|
||||
self.addCleanup(patcher_device_comm_cls.stop)
|
||||
|
||||
self.group_coordinator = GroupCoordinatorPatch(
|
||||
group_ranks=self.mock_group_ranks,
|
||||
local_rank=self.mock_local_rank,
|
||||
torch_distributed_backend=self.mock_backend,
|
||||
use_device_communicator=self.mock_use_device_comm)
|
||||
|
||||
def test_GroupCoordinator_patched(self):
|
||||
self.assertIs(GroupCoordinator, GroupCoordinatorPatch)
|
||||
|
||||
def test_all_to_all_returns_input_when_world_size_1(self):
|
||||
self.group_coordinator.world_size = 1
|
||||
input_tensor = torch.randn(2, 3)
|
||||
output = self.group_coordinator.all_to_all(input_tensor)
|
||||
self.assertTrue(torch.equal(output, input_tensor))
|
||||
|
||||
def test_all_to_all_raises_assertion_on_invalid_scatter_dim(self):
|
||||
input_tensor = torch.randn(2, 3)
|
||||
with self.assertRaises(AssertionError) as cm:
|
||||
self.group_coordinator.all_to_all(input_tensor, scatter_dim=2)
|
||||
self.assertIn("Invalid scatter dim", str(cm.exception))
|
||||
|
||||
def test_all_to_all_raises_assertion_on_invalid_gather_dim(self):
|
||||
input_tensor = torch.randn(2, 3)
|
||||
with self.assertRaises(AssertionError) as cm:
|
||||
self.group_coordinator.all_to_all(input_tensor, gather_dim=2)
|
||||
self.assertIn("Invalid gather dim", str(cm.exception))
|
||||
|
||||
def test_all_to_all_calls_device_communicator_with_correct_args(self):
|
||||
mock_communicator = MagicMock()
|
||||
self.group_coordinator.device_communicator = mock_communicator
|
||||
|
||||
input_tensor = torch.randn(2, 3)
|
||||
scatter_dim = 0
|
||||
gather_dim = 1
|
||||
scatter_sizes = [1, 1]
|
||||
gather_sizes = [1, 1]
|
||||
|
||||
self.group_coordinator.all_to_all(input_tensor,
|
||||
scatter_dim=scatter_dim,
|
||||
gather_dim=gather_dim,
|
||||
scatter_sizes=scatter_sizes,
|
||||
gather_sizes=gather_sizes)
|
||||
|
||||
mock_communicator.all_to_all.assert_called_once_with(
|
||||
input_tensor, scatter_dim, gather_dim, scatter_sizes, gather_sizes)
|
||||
|
||||
def test_all_to_all_calls_device_communicator_without_sizes(self):
|
||||
mock_communicator = MagicMock()
|
||||
self.group_coordinator.device_communicator = mock_communicator
|
||||
|
||||
input_tensor = torch.randn(2, 3)
|
||||
scatter_dim = 0
|
||||
gather_dim = 1
|
||||
|
||||
self.group_coordinator.all_to_all(input_tensor,
|
||||
scatter_dim=scatter_dim,
|
||||
gather_dim=gather_dim)
|
||||
|
||||
mock_communicator.all_to_all.assert_called_once_with(
|
||||
input_tensor, scatter_dim, gather_dim, None, None)
|
||||
167
tests/ut/patch/worker/patch_common/test_patch_linear.py
Normal file
167
tests/ut/patch/worker/patch_common/test_patch_linear.py
Normal file
@@ -0,0 +1,167 @@
|
||||
from importlib import reload
|
||||
|
||||
import pytest
|
||||
import torch
|
||||
import vllm
|
||||
from pytest_mock import MockerFixture
|
||||
|
||||
import vllm_ascend.envs as envs_ascend
|
||||
from tests.ut.base import PytestBase
|
||||
from vllm_ascend.patch.worker.patch_common import patch_linear
|
||||
|
||||
|
||||
class TestAscendRowParallelLinear(PytestBase):
|
||||
|
||||
def init_row_parallel_linear(self, mocker: MockerFixture):
|
||||
mocker.patch(
|
||||
"vllm_ascend.patch.worker.patch_common.patch_linear.AscendRowParallelLinear.__init__",
|
||||
return_value=None,
|
||||
)
|
||||
mocker.patch("torch.nn.Module.__setattr__")
|
||||
mocker.patch("torch.nn.Module.__getattr__")
|
||||
mocker.patch("torch.nn.Module.__delattr__")
|
||||
return patch_linear.AscendRowParallelLinear(
|
||||
input_size=128,
|
||||
output_size=256,
|
||||
)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"version, expected",
|
||||
[
|
||||
("1.0.0", 1),
|
||||
("2.1.0", 1),
|
||||
],
|
||||
)
|
||||
def test_get_hcomm_info(self, version, expected, mocker: MockerFixture):
|
||||
mock_group = mocker.MagicMock()
|
||||
backend = mocker.MagicMock()
|
||||
backend.get_hccl_comm_name = lambda x: x
|
||||
mock_group._get_backend = lambda x: backend
|
||||
mock_group.get_hccl_comm_name = lambda x: x
|
||||
mocker.patch("torch.distributed.get_rank", return_value=1)
|
||||
mocker.patch(
|
||||
"torch.distributed.get_global_rank",
|
||||
return_value=0,
|
||||
)
|
||||
mocker.patch("torch.__version__", new=version)
|
||||
hcomm_info = patch_linear.AscendRowParallelLinear.get_hcomm_info(
|
||||
mock_group)
|
||||
assert hcomm_info == expected
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"skip_bias_add, return_bias, bias, expected",
|
||||
[
|
||||
(True, False, torch.tensor(1.0), torch.tensor(14.0)),
|
||||
(False, True, torch.tensor(1.0), (torch.tensor(14.0), None)),
|
||||
(
|
||||
True,
|
||||
True,
|
||||
torch.tensor(1.0),
|
||||
(torch.tensor(14.0), torch.tensor(1.0)),
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_forward(
|
||||
self,
|
||||
skip_bias_add,
|
||||
return_bias,
|
||||
bias,
|
||||
expected,
|
||||
mocker: MockerFixture,
|
||||
):
|
||||
mocker_tp_group = mocker.MagicMock()
|
||||
mocker_tp_group.device_group = mocker.MagicMock()
|
||||
row_parallel_linear = self.init_row_parallel_linear(mocker)
|
||||
row_parallel_linear.__dict__["tp_rank"] = 0
|
||||
row_parallel_linear.__dict__["skip_bias_add"] = skip_bias_add
|
||||
row_parallel_linear.__dict__["return_bias"] = return_bias
|
||||
row_parallel_linear.__dict__["bias"] = bias
|
||||
row_parallel_linear.__dict__["qyuant_method"] = mocker.MagicMock()
|
||||
row_parallel_linear.__dict__["calc_input"] = lambda x: x # noqa
|
||||
row_parallel_linear.__dict__[
|
||||
"calc_output"] = lambda x: x.matmul( # noqa
|
||||
torch.tensor([1.0, 2.0]))
|
||||
ret = row_parallel_linear.forward(torch.tensor([10.0, 2.0]))
|
||||
if isinstance(ret, tuple):
|
||||
assert torch.allclose(ret[0], expected[0])
|
||||
if ret[1] is None:
|
||||
assert ret[1] == expected[1]
|
||||
else:
|
||||
assert torch.allclose(ret[1], expected[1])
|
||||
else:
|
||||
assert torch.allclose(ret, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"input_is_parallel, expected",
|
||||
[
|
||||
(True, torch.tensor([10.0, 2.0])),
|
||||
(False, torch.tensor([10.0])),
|
||||
],
|
||||
)
|
||||
def test_calc_input(
|
||||
self,
|
||||
input_is_parallel,
|
||||
expected,
|
||||
mocker: MockerFixture,
|
||||
):
|
||||
row_parallel_linear = self.init_row_parallel_linear(mocker)
|
||||
row_parallel_linear.__dict__["input_is_parallel"] = input_is_parallel
|
||||
input_tensor = torch.Tensor([10, 2])
|
||||
mocker.patch(
|
||||
"vllm_ascend.patch.worker.patch_common.patch_linear.get_tensor_model_parallel_rank", # noqa
|
||||
return_value=0,
|
||||
)
|
||||
mocker.patch(
|
||||
"vllm_ascend.patch.worker.patch_common.patch_linear.split_tensor_along_last_dim", # noqa
|
||||
return_value=[torch.Tensor([10]),
|
||||
torch.Tensor([2])],
|
||||
)
|
||||
input_parallel = row_parallel_linear.calc_input(input_tensor)
|
||||
assert torch.allclose(input_parallel, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"reduce_results, tp_size, expected",
|
||||
[
|
||||
(True, 2, torch.tensor(56.0)),
|
||||
(True, 1, torch.tensor(14.0)),
|
||||
(False, 2, torch.tensor(14.0)),
|
||||
],
|
||||
)
|
||||
def test_calc_output(
|
||||
self,
|
||||
reduce_results,
|
||||
tp_size,
|
||||
expected,
|
||||
mocker: MockerFixture,
|
||||
):
|
||||
quant_method = mocker.MagicMock()
|
||||
quant_method.apply = lambda self, x, bias=None: x.matmul( # noqa
|
||||
torch.tensor([1.0, 2.0]))
|
||||
row_parallel_linear = self.init_row_parallel_linear(mocker)
|
||||
row_parallel_linear.__dict__["reduce_results"] = reduce_results
|
||||
row_parallel_linear.__dict__["tp_size"] = tp_size
|
||||
row_parallel_linear.__dict__["quant_method"] = quant_method
|
||||
row_parallel_linear.__dict__["tp_rank"] = 0
|
||||
row_parallel_linear.__dict__["get_hcomm_info"] = lambda x: None # noqa
|
||||
|
||||
mocker.patch(
|
||||
"vllm_ascend.patch.worker.patch_common.patch_linear.get_tp_group",
|
||||
return_value=mocker.MagicMock(device_group=mocker.MagicMock()),
|
||||
)
|
||||
mocker.patch(
|
||||
"torch_npu.npu_mm_all_reduce_base",
|
||||
side_effect=lambda input_, weight, hccl_info, bias: input_.
|
||||
matmul( # noqa
|
||||
torch.tensor([4.0, 8.0])),
|
||||
) # noqa
|
||||
ret = row_parallel_linear.calc_output(torch.tensor([10.0, 2.0]))
|
||||
assert torch.allclose(ret, expected)
|
||||
|
||||
def test_enable_allreduce_matmul(self, mocker: MockerFixture):
|
||||
mocker.patch.object(envs_ascend,
|
||||
"VLLM_ASCEND_ENABLE_MATMUL_ALLREDUCE",
|
||||
new=True)
|
||||
reload(patch_linear)
|
||||
assert envs_ascend.VLLM_ASCEND_ENABLE_MATMUL_ALLREDUCE
|
||||
assert id(vllm.model_executor.layers.linear.RowParallelLinear) == id(
|
||||
patch_linear.AscendRowParallelLinear)
|
||||
77
tests/ut/patch/worker/patch_common/test_patch_minicpm.py
Normal file
77
tests/ut/patch/worker/patch_common/test_patch_minicpm.py
Normal file
@@ -0,0 +1,77 @@
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# This file is a part of the vllm-ascend project.
|
||||
#
|
||||
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import torch
|
||||
|
||||
from tests.ut.base import TestBase
|
||||
from vllm_ascend.patch.worker.patch_common.patch_minicpm import forward
|
||||
|
||||
|
||||
class TestPatchMiniCPM(TestBase):
|
||||
|
||||
def setUp(self):
|
||||
self.mock_self = MagicMock()
|
||||
|
||||
self.mock_self.q_size = 128
|
||||
self.mock_self.kv_size = 128
|
||||
|
||||
self.mock_self.qkv_proj = MagicMock()
|
||||
self.mock_self.rotary_emb = MagicMock()
|
||||
self.mock_self.attn = MagicMock()
|
||||
self.mock_self.o_proj = MagicMock()
|
||||
|
||||
self.positions = torch.tensor([1, 2, 3])
|
||||
self.hidden_states = torch.randn(3, 256)
|
||||
|
||||
self.mock_qkv = torch.randn(3, 384)
|
||||
self.mock_q = self.mock_qkv[:, :128]
|
||||
self.mock_k = self.mock_qkv[:, 128:256]
|
||||
self.mock_v = self.mock_qkv[:, 256:]
|
||||
|
||||
self.mock_self.qkv_proj.return_value = (self.mock_qkv, None)
|
||||
self.mock_self.rotary_emb.return_value = (self.mock_q, self.mock_k)
|
||||
self.mock_self.attn.return_value = torch.randn(3, 256)
|
||||
self.mock_self.o_proj.return_value = (torch.randn(3, 256), None)
|
||||
|
||||
def test_forward_patched(self):
|
||||
from vllm.model_executor.models.minicpm import MiniCPMAttention
|
||||
|
||||
self.assertIs(MiniCPMAttention.forward, forward)
|
||||
|
||||
def test_forward_function(self):
|
||||
result = forward(self.mock_self, self.positions, self.hidden_states)
|
||||
|
||||
self.mock_self.qkv_proj.assert_called_once_with(self.hidden_states)
|
||||
|
||||
args, _ = self.mock_self.rotary_emb.call_args
|
||||
self.assertEqual(len(args), 3)
|
||||
self.assertTrue(torch.equal(args[0], self.positions))
|
||||
self.assertTrue(torch.equal(args[1], self.mock_q))
|
||||
self.assertTrue(torch.equal(args[2], self.mock_k))
|
||||
|
||||
args, _ = self.mock_self.attn.call_args
|
||||
self.assertEqual(len(args), 3)
|
||||
self.assertTrue(torch.equal(args[0], self.mock_q))
|
||||
self.assertTrue(torch.equal(args[1], self.mock_k))
|
||||
self.assertTrue(torch.equal(args[2], self.mock_v))
|
||||
|
||||
self.mock_self.o_proj.assert_called_once_with(
|
||||
self.mock_self.attn.return_value)
|
||||
|
||||
self.assertEqual(result.shape, (3, 256))
|
||||
self.assertTrue(
|
||||
torch.equal(result, self.mock_self.o_proj.return_value[0]))
|
||||
Reference in New Issue
Block a user