[main] adapt usage of npu_moe_gating_top_k_softmax and remove envs.SELECT_GATING_TOPK_SOTFMAX_EXPERTS (#2112)

backport of v0.9.1-dev: https://github.com/vllm-project/vllm-ascend/pull/1902 origin main npu_moe_gating_top_k_softmax: https://github.com/vllm-project/vllm-ascend/pull/1355 - vLLM version: v0.10.0 - vLLM main: 055bd3978e Signed-off-by: huangxialu <huangxialu1@huawei.com>
2025-07-31 21:05:56 +08:00
parent e8660d7978
commit 9c9a7cd90b
5 changed files with 146 additions and 89 deletions
--- a/tests/ut/ops/test_fused_ops.py
+++ b/tests/ut/ops/test_fused_ops.py
@@ -297,9 +297,8 @@ class TestAscendUnquantizedFusedMoEMethod:
        assert not layer.w13_weight.requires_grad
        assert not layer.w2_weight.requires_grad

-    @pytest.mark.parametrize(
-        "others_param",
-        [[256, 4, False], [128, 1, False], [128, 1, True], [128, 4, False]])
+    @pytest.mark.parametrize("others_param",
+                             [[256, 4], [128, 1], [128, 1], [128, 4]])
    def test_apply_without_expert_map(self, moe_method, mock_dist_env,
                                      mock_moe_env, others_param):
        """
@@ -308,15 +307,13 @@ class TestAscendUnquantizedFusedMoEMethod:
        3 test use select_gating_topk_softmax_experts and fused_experts
        4 test use select_experts and fused_experts_with_all2all_buffer
        """
-        global_num_experts, ep_size, select_softmax = others_param
+        global_num_experts, ep_size = others_param
        is_prefill = False
        is_deepseek_v3_r1 = global_num_experts == 256
        forward_context = MagicMock(fused_moe_state=get_fused_moe_state(
            ep_size, is_prefill, is_deepseek_v3_r1))
-        with patch(
-                "vllm_ascend.ops.fused_moe.SELECT_GATING_TOPK_SOTFMAX_EXPERTS",
-                select_softmax), \
-             patch("vllm_ascend.ops.fused_moe.get_forward_context", return_value=forward_context):
+        with patch("vllm_ascend.ops.fused_moe.get_forward_context",
+                   return_value=forward_context):
            moe_method.ep_size = ep_size
            x = torch.randn(8, 2, 2)
            router_logits = torch.randn(8, 8)