refactor linear (#2867)

### What this PR does / why we need it? The current linear.py has the following issues: - There is redundant conditional logic in the `comm_group` and `forward` selection for classes such as `AscendMergedColumnParallelLinear`. - Inconsistent comm_group selection logic exists among `AscendMergedColumnParallelLinear`, `AscendColumnParallelLinear`, and `AscendQKVParallelLinear`. To address these two issues, this PR encapsulates `comm_group` and `forward` into classes and extracts the classes selection logic into common functions. For future additions of custom communication groups or forward methods, it will only be necessary to extend `CustomColumnParallelOp` or `CustomRowParallelOp` and add new selection logic. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? - vLLM version: v0.10.2 - vLLM main: dd39baf717 --------- Signed-off-by: realliujiaxu <realliujiaxu@163.com> Co-authored-by: weijinqian0 <weijinqian@huawei.com>
2025-09-18 14:09:19 +08:00
parent a7f8ed38ed
commit af2a886814
4 changed files with 695 additions and 510 deletions
--- a/tests/ut/models/test_qwen2_5_vl.py
+++ b/tests/ut/models/test_qwen2_5_vl.py
@@ -295,7 +295,7 @@ class TestAscendQwen2_5_VisionTransformer(PytestBase):
        mock_group.rank_in_group = 0
        mock_group.world_size = 2
        mocker.patch(
-            "vllm_ascend.ops.linear.get_tp_group",
+            "vllm_ascend.ops.linear_op.get_tp_group",
            return_value=mock_group,
        )

--- a/tests/ut/ops/test_linear.py
+++ b/tests/ut/ops/test_linear.py
@@ -7,8 +7,7 @@ import torch

 from vllm_ascend import ascend_config
 from vllm_ascend.distributed import parallel_state
-from vllm_ascend.ops.linear import (AscendColumnParallelLinear,
-                                    AscendMergedColumnParallelLinear,
+from vllm_ascend.ops.linear import (AscendMergedColumnParallelLinear,
                                    AscendRowParallelLinear)


@@ -32,7 +31,7 @@ class BaseLinearTest(unittest.TestCase):
                  return_value=self.mock_group),
            patch("vllm_ascend.distributed.parallel_state.get_mlp_tp_group",
                  return_value=self.mock_group),
-            patch("vllm_ascend.ops.linear.get_tp_group",
+            patch("vllm_ascend.ops.linear_op.get_tp_group",
                  return_value=self.mock_group),
            patch("vllm_ascend.utils.mlp_tp_enable", return_value=True),
            patch("vllm_ascend.utils.oproj_tp_enable", return_value=True)
@@ -56,8 +55,7 @@ class TestAscendRowParallelLinear(BaseLinearTest):
            output_size=8,
            prefix="down_proj",
        )
-        self.assertEqual(linear.comm_group, parallel_state._MLP_TP)
-        self.assertEqual(linear.forward_type, "mlp_tp")
+        self.assertEqual(linear.custom_op.comm_group, parallel_state._MLP_TP)

        input_tensor = torch.randn(16, 8)
        linear(input_tensor)
@@ -71,34 +69,23 @@ class TestAscendRowParallelLinear(BaseLinearTest):
            output_size=8,
            prefix="o_proj",
        )
-        self.assertEqual(linear.comm_group, parallel_state._OTP)
-        self.assertEqual(linear.forward_type, "oproj_tp")
+        self.assertEqual(linear.custom_op.comm_group, parallel_state._OTP)

        input_tensor = torch.randn(16, 8)
        linear(input_tensor)


-class TestAscendColumnParallelLinear(BaseLinearTest):
-
-    def test_mlp_tp_init(self):
-        linear = AscendColumnParallelLinear(
-            input_size=16,
-            output_size=8,
-            prefix="down_proj",
-        )
-        self.assertEqual(linear.comm_group, parallel_state._MLP_TP)
-
-
 class TestAscendMergedColumnParallelLinear(BaseLinearTest):

    def test_merged_mlp_tp_init(self):
+        os.environ["VLLM_ASCEND_ENABLE_MLP_OPTIMIZE"] = "1"
+
        linear = AscendMergedColumnParallelLinear(
            input_size=16,
            output_sizes=[8, 8],
            prefix="gate_up_proj",
        )
-        self.assertEqual(linear.comm_group, parallel_state._MLP_TP)
-        self.assertEqual(linear.forward_type, "mlp_tp")
+        self.assertEqual(linear.custom_op.comm_group, parallel_state._MLP_TP)


 if __name__ == '__main__':