refactor linear (#2867)
### What this PR does / why we need it?
The current linear.py has the following issues:
- There is redundant conditional logic in the `comm_group` and `forward`
selection for classes such as `AscendMergedColumnParallelLinear`.
- Inconsistent comm_group selection logic exists among
`AscendMergedColumnParallelLinear`, `AscendColumnParallelLinear`, and
`AscendQKVParallelLinear`.
To address these two issues, this PR encapsulates `comm_group` and
`forward` into classes and extracts the classes selection logic into
common functions. For future additions of custom communication groups or
forward methods, it will only be necessary to extend
`CustomColumnParallelOp` or `CustomRowParallelOp` and add new selection
logic.
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
- vLLM version: v0.10.2
- vLLM main:
dd39baf717
---------
Signed-off-by: realliujiaxu <realliujiaxu@163.com>
Co-authored-by: weijinqian0 <weijinqian@huawei.com>
This commit is contained in:
@@ -295,7 +295,7 @@ class TestAscendQwen2_5_VisionTransformer(PytestBase):
|
||||
mock_group.rank_in_group = 0
|
||||
mock_group.world_size = 2
|
||||
mocker.patch(
|
||||
"vllm_ascend.ops.linear.get_tp_group",
|
||||
"vllm_ascend.ops.linear_op.get_tp_group",
|
||||
return_value=mock_group,
|
||||
)
|
||||
|
||||
|
||||
@@ -7,8 +7,7 @@ import torch
|
||||
|
||||
from vllm_ascend import ascend_config
|
||||
from vllm_ascend.distributed import parallel_state
|
||||
from vllm_ascend.ops.linear import (AscendColumnParallelLinear,
|
||||
AscendMergedColumnParallelLinear,
|
||||
from vllm_ascend.ops.linear import (AscendMergedColumnParallelLinear,
|
||||
AscendRowParallelLinear)
|
||||
|
||||
|
||||
@@ -32,7 +31,7 @@ class BaseLinearTest(unittest.TestCase):
|
||||
return_value=self.mock_group),
|
||||
patch("vllm_ascend.distributed.parallel_state.get_mlp_tp_group",
|
||||
return_value=self.mock_group),
|
||||
patch("vllm_ascend.ops.linear.get_tp_group",
|
||||
patch("vllm_ascend.ops.linear_op.get_tp_group",
|
||||
return_value=self.mock_group),
|
||||
patch("vllm_ascend.utils.mlp_tp_enable", return_value=True),
|
||||
patch("vllm_ascend.utils.oproj_tp_enable", return_value=True)
|
||||
@@ -56,8 +55,7 @@ class TestAscendRowParallelLinear(BaseLinearTest):
|
||||
output_size=8,
|
||||
prefix="down_proj",
|
||||
)
|
||||
self.assertEqual(linear.comm_group, parallel_state._MLP_TP)
|
||||
self.assertEqual(linear.forward_type, "mlp_tp")
|
||||
self.assertEqual(linear.custom_op.comm_group, parallel_state._MLP_TP)
|
||||
|
||||
input_tensor = torch.randn(16, 8)
|
||||
linear(input_tensor)
|
||||
@@ -71,34 +69,23 @@ class TestAscendRowParallelLinear(BaseLinearTest):
|
||||
output_size=8,
|
||||
prefix="o_proj",
|
||||
)
|
||||
self.assertEqual(linear.comm_group, parallel_state._OTP)
|
||||
self.assertEqual(linear.forward_type, "oproj_tp")
|
||||
self.assertEqual(linear.custom_op.comm_group, parallel_state._OTP)
|
||||
|
||||
input_tensor = torch.randn(16, 8)
|
||||
linear(input_tensor)
|
||||
|
||||
|
||||
class TestAscendColumnParallelLinear(BaseLinearTest):
|
||||
|
||||
def test_mlp_tp_init(self):
|
||||
linear = AscendColumnParallelLinear(
|
||||
input_size=16,
|
||||
output_size=8,
|
||||
prefix="down_proj",
|
||||
)
|
||||
self.assertEqual(linear.comm_group, parallel_state._MLP_TP)
|
||||
|
||||
|
||||
class TestAscendMergedColumnParallelLinear(BaseLinearTest):
|
||||
|
||||
def test_merged_mlp_tp_init(self):
|
||||
os.environ["VLLM_ASCEND_ENABLE_MLP_OPTIMIZE"] = "1"
|
||||
|
||||
linear = AscendMergedColumnParallelLinear(
|
||||
input_size=16,
|
||||
output_sizes=[8, 8],
|
||||
prefix="gate_up_proj",
|
||||
)
|
||||
self.assertEqual(linear.comm_group, parallel_state._MLP_TP)
|
||||
self.assertEqual(linear.forward_type, "mlp_tp")
|
||||
self.assertEqual(linear.custom_op.comm_group, parallel_state._MLP_TP)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
Reference in New Issue
Block a user