Refactor tensor_parallel and comm_utils (#2814)

### What this PR does / why we need it? 1. Move ops/comm_utils to ops/moe/comm_utils 2. Move distributed/tensor_parallel/gather_from_sequence_parallel_region to ops/moe/comm_utils 3. Delete distributed/tensor_parallel ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? e2e & ut - vLLM version: main - vLLM main: a1213fae5f --------- Signed-off-by: wuweiqiang24 <1005334931@qq.com> Signed-off-by: wuweiqiang24 <wuweiqiang11@huawei.com>
2025-09-11 21:26:36 +08:00
parent 0005479b9c
commit 9615dea3a7
6 changed files with 153 additions and 392 deletions
--- a/tests/ut/distributed/test_distributed_tensor_parallel.py
+++ b/tests/ut/distributed/test_distributed_tensor_parallel.py
@@ -1,139 +0,0 @@
-#
-# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
-# Copyright 2023 The vLLM team.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# This file is a part of the vllm-ascend project.
-
-import importlib
-
-import pytest
-import torch
-from pytest_mock import MockerFixture
-
-from tests.ut.base import PytestBase
-from vllm_ascend.distributed.tensor_parallel import (
-    _gather_along_first_dim, _gather_along_last_dim,
-    _reduce_scatter_along_first_dim, _reduce_scatter_along_last_dim,
-    all_to_all_hp2sp, all_to_all_sp2hp)
-
-
-class TestDistributedCommunication(PytestBase):
-
-    @pytest.fixture(autouse=True)
-    def context(self, mocker: MockerFixture):
-        mocker.patch("torch.npu.current_device", return_value="cpu")
-        mocker.patch("torch.distributed.get_world_size", return_value=4)
-
-        mocker.patch("torch.distributed.get_rank", return_value=0)
-
-    @pytest.mark.parametrize("world_size, test_tensor, expected",
-                             [(1, torch.randn(8, 16), (8, 16)),
-                              (4, torch.randn(8, 16), (32, 16))])
-    def test_gather_along_first_dim(self, test_tensor, expected, world_size,
-                                    mocker: MockerFixture):
-        """test _gather_along_first_dim"""
-        mocker.patch("torch.distributed.get_world_size",
-                     return_value=world_size)
-
-        result = _gather_along_first_dim(test_tensor, mocker.MagicMock())
-
-        assert result.shape == expected
-
-    @pytest.mark.parametrize("test_tensor, output_split_sizes, expected", [
-        (torch.randn(8, 16), [5, 10, 15, 2], (32, 16)),
-    ])
-    def test_gather_along_first_dim_unequal_split(self, test_tensor, expected,
-                                                  output_split_sizes,
-                                                  mocker: MockerFixture):
-        """test _gather_along_first_dim"""
-
-        result = _gather_along_first_dim(test_tensor, mocker.MagicMock(),
-                                         output_split_sizes)
-
-        assert result.shape == expected
-
-    @pytest.mark.parametrize("world_size, test_tensor, expected",
-                             [(1, torch.randn(8, 16, 32), (8, 16, 32)),
-                              (4, torch.randn(8, 16, 32), (8, 16, 32 * 4))])
-    def test_gather_along_last_dim(self, test_tensor, expected, world_size,
-                                   mocker: MockerFixture):
-        """test _gather_along_last_dim"""
-        mocker.patch("torch.distributed.get_world_size",
-                     return_value=world_size)
-
-        result = _gather_along_last_dim(test_tensor, mocker.MagicMock())
-
-        assert result.shape == expected
-
-    @pytest.mark.parametrize("input_shape,expected_shape", [
-        ((32, 16), (8, 16)),
-        ((40, 10), (10, 10)),
-    ])
-    def test_reduce_scatter_along_first_dim(self, input_shape, expected_shape,
-                                            mocker: MockerFixture):
-        input_tensor = torch.randn(*input_shape)
-        result = _reduce_scatter_along_first_dim(input_tensor,
-                                                 mocker.MagicMock())
-        assert result.shape == expected_shape
-
-    @pytest.mark.parametrize("input_shape,expected_shape", [
-        ((8, 16, 32), (8, 16, 8)),
-    ])
-    def test_reduce_scatter_along_last_dim(self, input_shape, expected_shape,
-                                           mocker: MockerFixture):
-        input_tensor = torch.randn(*input_shape)
-        result = _reduce_scatter_along_last_dim(input_tensor,
-                                                mocker.MagicMock())
-        assert result.shape == expected_shape
-
-    @pytest.mark.parametrize("func,input_shape,expected_shape", [
-        ("all_gather_last_dim_from_tensor_parallel_region", (8, 16, 32),
-         (8, 16, 128)),
-        ("reduce_scatter_to_sequence_parallel_region", (32, 16), (8, 16)),
-        ("reduce_scatter_last_dim_to_tensor_parallel_region", (8, 16, 32),
-         (8, 16, 8)),
-        ("gather_from_sequence_parallel_region", (8, 16), (32, 16)),
-    ])
-    def test_wrapper_functions(self, func, input_shape, expected_shape,
-                               mocker: MockerFixture):
-        """test wrapper funcs"""
-        mod = importlib.import_module(
-            'vllm_ascend.distributed.tensor_parallel')
-        globals = mod.__dict__
-        test_func = globals[func]
-        input_tensor = torch.randn(*input_shape)
-        result = test_func(input_tensor, mocker.MagicMock())
-        assert result.shape == expected_shape
-
-    @pytest.mark.parametrize(
-        "input_shape,output_shape",
-        [
-            ((8, 16), (32, 4)),  # [num_tokens/TP, H] -> [num_tokens, H/TP]
-        ])
-    def test_all_to_all_sp2hp(self, input_shape, output_shape,
-                              mocker: MockerFixture):
-        input_tensor = torch.randn(*input_shape)
-        result = all_to_all_sp2hp(input_tensor, mocker.MagicMock())
-        assert result.shape == output_shape
-
-    @pytest.mark.parametrize(
-        "input_shape,output_shape",
-        [
-            ((32, 4), (8, 16)),  # [num_tokens, H/TP] -> [num_tokens/TP, H]
-        ])
-    def test_all_to_all_hp2sp(self, input_shape, output_shape,
-                              mocker: MockerFixture):
-        input_tensor = torch.randn(*input_shape)
-        result = all_to_all_hp2sp(input_tensor, mocker.MagicMock())
-        assert result.shape == output_shape
--- a/tests/ut/ops/test_comm_utils.py
+++ b/tests/ut/ops/test_comm_utils.py
@@ -0,0 +1,98 @@
+#
+# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
+# Copyright 2023 The vLLM team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# This file is a part of the vllm-ascend project.
+
+import pytest
+import torch
+from pytest_mock import MockerFixture
+
+from tests.ut.base import PytestBase
+from vllm_ascend.ops.moe.comm_utils import (
+    _gather_along_first_dim, async_all_to_all,
+    gather_from_sequence_parallel_region)
+
+
+class TestDistributedCommunication(PytestBase):
+
+    @pytest.fixture(autouse=True)
+    def context(self, mocker: MockerFixture):
+        mocker.patch("torch.npu.current_device", return_value="cpu")
+        mocker.patch("torch.distributed.get_world_size", return_value=4)
+
+        mocker.patch("torch.distributed.get_rank", return_value=0)
+
+    @pytest.mark.parametrize(
+        "input_tensor, output_split_sizes, input_split_sizes",
+        [(torch.randn(8, 16), [2, 2, 2, 2], [2, 2, 2, 2]),
+         (torch.randn(16, 32), None, None)])
+    def test_async_all_to_all(self, input_tensor, output_split_sizes,
+                              input_split_sizes, mocker: MockerFixture):
+        """Test async_all_to_all"""
+        mock_group = mocker.MagicMock()
+        mocker.patch("torch.distributed.all_to_all_single",
+                     return_value=mocker.MagicMock())
+
+        _, a2a_out, handle = async_all_to_all(input_tensor, output_split_sizes,
+                                              input_split_sizes, mock_group)
+
+        # Check if the output tensor is created properly
+        if output_split_sizes is None:
+            assert a2a_out.shape == input_tensor.shape
+        else:
+            total_output_size = sum(output_split_sizes)
+            expected_shape = [total_output_size] + list(
+                input_tensor.size())[1:]
+            assert a2a_out.shape == torch.Size(expected_shape)
+
+        # Ensure handle is returned from async operation
+        assert handle is not None
+        assert isinstance(handle, mocker.MagicMock)
+
+    @pytest.mark.parametrize("world_size, test_tensor, expected",
+                             [(1, torch.randn(8, 16), (8, 16)),
+                              (4, torch.randn(8, 16), (32, 16))])
+    def test_gather_along_first_dim(self, test_tensor, expected, world_size,
+                                    mocker: MockerFixture):
+        """Test _gather_along_first_dim"""
+        mocker.patch("torch.distributed.get_world_size",
+                     return_value=world_size)
+
+        result = _gather_along_first_dim(test_tensor, mocker.MagicMock())
+
+        assert result.shape == expected
+
+    @pytest.mark.parametrize("input_tensor, output_split_sizes",
+                             [(torch.randn(8, 16), None),
+                              (torch.randn(8, 16), [2, 2, 2, 2])])
+    def test_gather_from_sequence_parallel_region(self, input_tensor,
+                                                  output_split_sizes,
+                                                  mocker: MockerFixture):
+        """Test gather_from_sequence_parallel_region"""
+        mock_group = mocker.MagicMock()
+
+        result = gather_from_sequence_parallel_region(input_tensor, mock_group,
+                                                      output_split_sizes)
+
+        # If output_split_sizes is not provided, result should have expanded first dimension by world size
+        if output_split_sizes is None:
+            expected_shape = [input_tensor.shape[0] * 4] + list(
+                input_tensor.shape[1:])
+            assert result.shape == torch.Size(expected_shape)
+        else:
+            # If output_split_sizes is provided, result shape is dictated by sum of output_split_sizes
+            expected_shape = [sum(output_split_sizes)] + list(
+                input_tensor.shape[1:])
+            assert result.shape == torch.Size(expected_shape)
--- a/tests/ut/ops/test_token_dispatcher.py
+++ b/tests/ut/ops/test_token_dispatcher.py
@@ -348,7 +348,7 @@ class TestTokenDispatcherWithAll2AllV(TestBase):
        self.mock_npu_moe_token_unpermute.return_value = torch.randn(8, 16)

        # Mock async_all_to_all
-        patcher6 = patch('vllm_ascend.ops.comm_utils.async_all_to_all')
+        patcher6 = patch('vllm_ascend.ops.moe.comm_utils.async_all_to_all')
        self.mock_async_all_to_all = patcher6.start()
        self.addCleanup(patcher6.stop)
        self.mock_async_all_to_all.return_value = (None, torch.randn(16, 16),