[Refactor][MoE] remove redundant code after refactoring fused_moe (#2612)

### What this PR does / why we need it? There are a lot of redundant codes related to moe here, and the structure is not very clear. We did the following things： we have placed the relatively independent code related to apply_mlp into a separate file; removed the environment variables of alltoall_buffer and alltoall_seq. Remove the code related to alltoall_buffer and alltoall_seq, and retain the sole TokenDispatcher inheritance class. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? e2e&ut - vLLM version: v0.10.1.1 - vLLM main: 4071c76cf3 --------- Signed-off-by: Pr0Wh1teGivee <calvin_zhu0210@outlook.com> Signed-off-by: weijinqian_v1 <weijinqian@huawei.com> Co-authored-by: weijinqian0 <12153182+weijinqian0@users.noreply.github.com>
2025-08-30 22:28:50 +08:00
parent 20ae71291d
commit 3a5fc5ee01
13 changed files with 417 additions and 1237 deletions
--- a/tests/ut/ops/test_common_fused_moe.py
+++ b/tests/ut/ops/test_common_fused_moe.py
@@ -0,0 +1,69 @@
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# This file is a part of the vllm-ascend project.
+#
+from unittest.mock import patch
+
+import torch
+
+from tests.ut.base import TestBase
+from vllm_ascend.ops.common_fused_moe import fused_experts_moge
+
+
+class TestFusedExpertsMoGE(TestBase):
+
+    def test_fused_experts_moge(self):
+        with patch('torch_npu.npu_grouped_matmul') as mock_grouped_matmul, \
+             patch('torch_npu.npu_swiglu') as mock_swiglu, \
+             patch('vllm_ascend.utils.is_310p') as mock_is_310p:
+
+            mock_is_310p.return_value = False
+
+            mock_grouped_matmul.side_effect = lambda x, weight, **kwargs: [
+                torch.randn(x[0].shape[0], weight[0].shape[1])
+            ]
+
+            mock_swiglu.side_effect = lambda x: x
+
+            hidden_states = torch.randn(4, 128)
+            w1 = torch.randn(4, 256, 128)
+            w2 = torch.randn(4, 128, 128)
+            topk_weights = torch.rand(4, 1)
+            topk_ids = torch.tensor([[0], [1], [2], [3]], dtype=torch.long)
+            top_k = 1
+            global_num_experts = 4
+
+            moe_parallel_config = type(
+                'MockConfig', (), {
+                    'ep_size': 1,
+                    'tp_size': 1,
+                    'dp_size': 1,
+                    'tp_rank': 0,
+                    'dp_rank': 0,
+                    'ep_rank': 0,
+                    'use_ep': True
+                })()
+
+            output = fused_experts_moge(
+                hidden_states=hidden_states,
+                w1=w1,
+                w2=w2,
+                moe_parallel_config=moe_parallel_config,
+                topk_weights=topk_weights,
+                topk_ids=topk_ids,
+                top_k=top_k,
+                global_num_experts=global_num_experts,
+                apply_router_weight_on_input=True,
+            )
+
+            self.assertEqual(output.shape, (4, 128))