[Refactor] [MoE] Rename moe-related classes & files (#3646)

### What this PR does / why we need it?
1. Rename common_fused_moe.py to fused_moe.py.
2. Rename fused_moe_prepare_and_finalize.py / FusedMoEPrepareAndFinalize
to prepare_finalize.py / PrepareAndFinalize.
3. Rename vllm_ascend/ops/moe to vllm_ascend/ops/fused_moe.
4. Move vllm_ascend/ops/fused_moe.py to
vllm_ascend/ops/fused_moe/fused_moe.py
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
e2e & ut

- vLLM version: v0.11.0rc3
- vLLM main:
17c540a993

Signed-off-by: Pr0Wh1teGivee <calvin_zhu0210@outlook.com>
This commit is contained in:
weichen
2025-10-25 11:22:03 +08:00
committed by GitHub
parent 0637e8f021
commit 63c363d3de
25 changed files with 183 additions and 199 deletions

View File

@@ -28,9 +28,10 @@ import torch
import torch_npu
from vllm.model_executor.layers.activation import SiluAndMul
from vllm_ascend.ops.moe.experts_selector import select_experts
from vllm_ascend.ops.moe.moe_mlp import unified_apply_mlp
from vllm_ascend.ops.moe.token_dispatcher import TokenDispatcherWithAllGather
from vllm_ascend.ops.fused_moe.experts_selector import select_experts
from vllm_ascend.ops.fused_moe.moe_mlp import unified_apply_mlp
from vllm_ascend.ops.fused_moe.token_dispatcher import \
TokenDispatcherWithAllGather
NUM_EXPERTS = [8, 64]
EP_SIZE = [1]
@@ -182,7 +183,7 @@ def test_token_dispatcher_with_all_gather_quant(
):
context_mock = MagicMock()
context_mock.fused_moe_state = 0
with patch("vllm_ascend.ops.moe.moe_mlp.get_forward_context",
with patch("vllm_ascend.ops.fused_moe.moe_mlp.get_forward_context",
return_value=context_mock):
a = torch.randn((m, k), device=device, dtype=dtype) / 10
w1 = torch.randn((e, k, 2 * n), device=device, dtype=torch.int8)
@@ -282,9 +283,9 @@ def test_select_experts(
dtype=torch.int32)
custom_routing_function.return_value = (mock_weights, mock_ids)
with patch("vllm_ascend.ops.moe.experts_selector._native_grouped_topk"
with patch("vllm_ascend.ops.fused_moe.experts_selector._native_grouped_topk"
) as mock_native_grouped_topk, \
patch('vllm_ascend.ops.moe.experts_selector.get_forward_context',
patch('vllm_ascend.ops.fused_moe.experts_selector.get_forward_context',
return_value=MagicMock(weight_prefetch_method=MagicMock())):
mock_native_grouped_topk.side_effect = lambda x, num_groups, k: torch.randn_like(
x)
@@ -318,7 +319,7 @@ def test_select_experts(
@pytest.mark.parametrize("device", DEVICE)
def test_select_experts_invalid_scoring_func(device: str):
with patch('vllm_ascend.ops.moe.experts_selector.get_forward_context',
with patch('vllm_ascend.ops.fused_moe.experts_selector.get_forward_context',
return_value=MagicMock(weight_prefetch_method=MagicMock())), \
pytest.raises(ValueError,
match="Unsupported scoring function: invalid"):