[Refactor] [MoE] Rename moe-related classes & files (#3646)

### What this PR does / why we need it? 1. Rename common_fused_moe.py to fused_moe.py. 2. Rename fused_moe_prepare_and_finalize.py / FusedMoEPrepareAndFinalize to prepare_finalize.py / PrepareAndFinalize. 3. Rename vllm_ascend/ops/moe to vllm_ascend/ops/fused_moe. 4. Move vllm_ascend/ops/fused_moe.py to vllm_ascend/ops/fused_moe/fused_moe.py ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? e2e & ut - vLLM version: v0.11.0rc3 - vLLM main: 17c540a993 Signed-off-by: Pr0Wh1teGivee <calvin_zhu0210@outlook.com>
2025-10-25 11:22:03 +08:00
parent 0637e8f021
commit 63c363d3de
25 changed files with 183 additions and 199 deletions
--- a/tests/e2e/singlecard/ops/test_fused_moe.py
+++ b/tests/e2e/singlecard/ops/test_fused_moe.py
@@ -28,9 +28,10 @@ import torch
 import torch_npu
 from vllm.model_executor.layers.activation import SiluAndMul

-from vllm_ascend.ops.moe.experts_selector import select_experts
-from vllm_ascend.ops.moe.moe_mlp import unified_apply_mlp
-from vllm_ascend.ops.moe.token_dispatcher import TokenDispatcherWithAllGather
+from vllm_ascend.ops.fused_moe.experts_selector import select_experts
+from vllm_ascend.ops.fused_moe.moe_mlp import unified_apply_mlp
+from vllm_ascend.ops.fused_moe.token_dispatcher import \
+    TokenDispatcherWithAllGather

 NUM_EXPERTS = [8, 64]
 EP_SIZE = [1]
@@ -182,7 +183,7 @@ def test_token_dispatcher_with_all_gather_quant(
 ):
    context_mock = MagicMock()
    context_mock.fused_moe_state = 0
-    with patch("vllm_ascend.ops.moe.moe_mlp.get_forward_context",
+    with patch("vllm_ascend.ops.fused_moe.moe_mlp.get_forward_context",
               return_value=context_mock):
        a = torch.randn((m, k), device=device, dtype=dtype) / 10
        w1 = torch.randn((e, k, 2 * n), device=device, dtype=torch.int8)
@@ -282,9 +283,9 @@ def test_select_experts(
                                 dtype=torch.int32)
        custom_routing_function.return_value = (mock_weights, mock_ids)

-    with patch("vllm_ascend.ops.moe.experts_selector._native_grouped_topk"
+    with patch("vllm_ascend.ops.fused_moe.experts_selector._native_grouped_topk"
               ) as mock_native_grouped_topk, \
-            patch('vllm_ascend.ops.moe.experts_selector.get_forward_context',
+            patch('vllm_ascend.ops.fused_moe.experts_selector.get_forward_context',
                  return_value=MagicMock(weight_prefetch_method=MagicMock())):
        mock_native_grouped_topk.side_effect = lambda x, num_groups, k: torch.randn_like(
            x)
@@ -318,7 +319,7 @@ def test_select_experts(

@pytest.mark.parametrize("device", DEVICE)
 def test_select_experts_invalid_scoring_func(device: str):
-    with patch('vllm_ascend.ops.moe.experts_selector.get_forward_context',
+    with patch('vllm_ascend.ops.fused_moe.experts_selector.get_forward_context',
                  return_value=MagicMock(weight_prefetch_method=MagicMock())), \
            pytest.raises(ValueError,
                       match="Unsupported scoring function: invalid"):