[Refactor] Adjustments to moe_comm_method selection process (#3001)

### What this PR does / why we need it? Fix issues mentioned in https://github.com/vllm-project/vllm-ascend/pull/2791 and some minor refactoring. 1. Use Enum instead of string. 2. Avoid setting a new property to forward_context in AscendFusedMoE.forward(). 3. Enabling TokenDispatcherWithMoge. 4. Remove redundant code. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Qwen3-30B-A3B/Qwen3-30B-A3B-W8A8/DeepSeek-V3-W4A8-Pruing/deepseek-mtp/pangu-pro-moe-pruing: 1. Enable/Disable EP 2. Aclgraph & eager - vLLM version: v0.10.2 - vLLM main: 9607d5eb44 Signed-off-by: Pr0Wh1teGivee <calvin_zhu0210@outlook.com> Co-authored-by: weijinqian0 <12153182+weijinqian0@users.noreply.github.com>
2025-09-22 19:12:58 +08:00
parent bb1f0d5a62
commit 37a0715eda
14 changed files with 170 additions and 351 deletions
--- a/tests/ut/ops/test_fused_ops.py
+++ b/tests/ut/ops/test_fused_ops.py
@@ -23,6 +23,7 @@ from pytest_mock import MockerFixture
 from vllm.model_executor.layers.fused_moe import FusedMoEMethodBase

 from tests.ut.base import TestBase
+from vllm_ascend.ascend_forward_context import MoECommType
 from vllm_ascend.ops.fused_moe import (AscendFusedMoE,
                                       AscendUnquantizedFusedMoEMethod)
 from vllm_ascend.ops.moe.experts_selector import select_experts
@@ -55,6 +56,26 @@ def mock_npu_format_cast(weight_data, format):
    return weight_data


+@pytest.fixture(autouse=True)
+def setup_vllm_config_mock(mocker: MockerFixture):
+    mock_hf_config = MagicMock()
+    mock_hf_config.model_type = "llama"
+
+    mock_model_config = MagicMock()
+    mock_model_config.hf_config = mock_hf_config
+
+    mock_vllm_config = MagicMock()
+    mock_vllm_config.model_config = mock_model_config
+    mock_vllm_config.parallel_config = MagicMock(tensor_parallel_size=2)
+    mock_vllm_config.scheduler_config = MagicMock(max_num_seqs=4)
+    mock_vllm_config.model_config.max_model_len = 2048
+
+    mocker.patch('vllm_ascend.ops.fused_moe.get_current_vllm_config',
+                 return_value=mock_vllm_config)
+    mocker.patch('vllm_ascend.ops.moe.moe_comm_method.get_current_vllm_config',
+                 return_value=mock_vllm_config)
+
+
@pytest.fixture
 def mock_dist_env(mocker: MockerFixture):
    mock_moe_comm_method = MagicMock()
@@ -74,7 +95,7 @@ def mock_dist_env(mocker: MockerFixture):

    mock_forward_context_obj = MagicMock(
        moe_comm_method=mock_moe_comm_method,
-        moe_comm_method_name="mc2commimpl",
+        moe_comm_type=MoECommType.MC2,
        max_tokens_across_dp=10,
        dp_metadata=MagicMock(cu_tokens_across_dp_cpu=[5, 10]),
        mc2_mask=torch.zeros(16, dtype=torch.bool),
@@ -104,12 +125,6 @@ def mock_dist_env(mocker: MockerFixture):
            return_value=mock_forward_context_obj), \
        patch('vllm_ascend.ops.moe.fused_moe_prepare_and_finalize.get_forward_context',
            return_value=mock_forward_context_obj), \
-        patch('vllm_ascend.ops.fused_moe.get_current_vllm_config',
-                return_value=MagicMock(
-                    parallel_config=MagicMock(tensor_parallel_size=2),
-                    scheduler_config=MagicMock(max_num_seqs=4),
-                    model_config=MagicMock(max_model_len=2048)
-                )), \
        patch("vllm_ascend.utils.get_ascend_soc_version", return_value=AscendSocVersion.A3), \
        patch('vllm_ascend.ops.moe.moe_mlp.get_forward_context',
                return_value=mock_forward_context_obj), \
@@ -501,7 +516,7 @@ class TestUnifiedApplyMLP(TestBase):
                                                     mock_get_forward_context):

        mock_forward_context = MagicMock()
-        mock_forward_context.moe_comm_method_name = "mc2commimpl"
+        mock_forward_context.moe_comm_type = MoECommType.MC2
        mock_get_forward_context.return_value = mock_forward_context

        mock_is_310p.return_value = False