[Refactor] [MoE] Rename moe-related classes & files (#3646)

### What this PR does / why we need it?
1. Rename common_fused_moe.py to fused_moe.py.
2. Rename fused_moe_prepare_and_finalize.py / FusedMoEPrepareAndFinalize
to prepare_finalize.py / PrepareAndFinalize.
3. Rename vllm_ascend/ops/moe to vllm_ascend/ops/fused_moe.
4. Move vllm_ascend/ops/fused_moe.py to
vllm_ascend/ops/fused_moe/fused_moe.py
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
e2e & ut

- vLLM version: v0.11.0rc3
- vLLM main:
17c540a993

Signed-off-by: Pr0Wh1teGivee <calvin_zhu0210@outlook.com>
This commit is contained in:
weichen
2025-10-25 11:22:03 +08:00
committed by GitHub
parent 0637e8f021
commit 63c363d3de
25 changed files with 183 additions and 199 deletions

View File

@@ -5,8 +5,8 @@ import torch
from tests.ut.base import TestBase
from vllm_ascend.attention.attention_v1 import AscendAttentionState
from vllm_ascend.ops.moe.experts_selector import (_native_grouped_topk,
select_experts)
from vllm_ascend.ops.fused_moe.experts_selector import (_native_grouped_topk,
select_experts)
from vllm_ascend.quantization.w8a8 import (AscendC8KVCacheMethod,
AscendW8A8FusedMoEMethod,
AscendW8A8LinearMethod,
@@ -758,7 +758,7 @@ class TestSelectExperts(TestBase):
self.mock_ctx = MagicMock()
self.mock_ctx.weight_prefetch_method = MagicMock()
patcher = patch(
'vllm_ascend.ops.moe.experts_selector.get_forward_context',
'vllm_ascend.ops.fused_moe.experts_selector.get_forward_context',
return_value=self.mock_ctx)
self.addCleanup(patcher.stop)
patcher.start()
@@ -831,7 +831,7 @@ class TestSelectExperts(TestBase):
self.assertEqual(ids.shape, (self.num_tokens, self.top_k))
self.assertEqual(ids.dtype, torch.int32)
@patch('vllm_ascend.ops.moe.experts_selector._native_grouped_topk')
@patch('vllm_ascend.ops.fused_moe.experts_selector._native_grouped_topk')
def test_grouped_topk_with_correction_bias(self, mock_grouped_topk):
"""Test grouped topk with expert score correction bias"""
mock_grouped_topk.return_value = torch.ones(self.num_tokens,