[BugFix]Support redundant experts in EPLB (#3473)

This PR adds support for redundant experts in the EPLB. Key points: - Use global_num_experts = num_experts + num_redundant_experts consistently. - Backward compatible when num_redundant_experts=0. Tested On a 16-rank setup (W8A8) with static EPLB and expert_map_path, verifying router logits shape and successful requests. - vLLM version: v0.11.0rc3 - vLLM main: https://github.com/vllm-project/vllm/commit/v0.11.0 Signed-off-by: yechao237 <yechao20180411@gmail.com>
2025-10-18 00:09:16 +08:00
parent 07ca1b9b78
commit 4750d45d86
12 changed files with 23 additions and 35 deletions
--- a/tests/ut/eplb/core/test_eplb_utils.py
+++ b/tests/ut/eplb/core/test_eplb_utils.py
@@ -34,8 +34,8 @@ def test_determine_default_expert_map_multiple_worlds_with_redundant():
        rank_id=0,
        global_redundant_expert_num=1)

-    assert count == 3
-    assert torch.all(expert_map[0:3] >= 0)
+    assert count == 2
+    assert torch.all(expert_map[0:2] >= 0)


 def test_generate_log2phy_map_single_rank_holding():
--- a/tests/ut/ops/test_fused_ops.py
+++ b/tests/ut/ops/test_fused_ops.py
@@ -257,7 +257,7 @@ class MockFusedMoEMethod(FusedMoEMethodBase):

 class TestExpertsSelector:

-    @pytest.mark.parametrize("global_num_experts", [[256], [128]])
+    @pytest.mark.parametrize("global_num_experts", [256, 128])
    def test_select_experts(self, mock_dist_env, mock_moe_env,
                            global_num_experts):

--- a/tests/ut/torchair/ops/test_torchair_fused_moe.py
+++ b/tests/ut/torchair/ops/test_torchair_fused_moe.py
@@ -22,6 +22,7 @@ import torch_npu
 from pytest_mock import MockerFixture
 from vllm.model_executor.layers.fused_moe import FusedMoEMethodBase

+from vllm_ascend.ascend_config import get_ascend_config
 from vllm_ascend.ascend_forward_context import _get_fused_moe_state
 from vllm_ascend.quantization.quant_config import AscendFusedMoEMethod
 from vllm_ascend.torchair.ops.torchair_fused_moe import (
@@ -355,7 +356,9 @@ class TestTorchairAscendUnquantizedFusedMoEMethod:
        """
        global_num_experts, ep_size = others_param
        is_prefill = False
-        is_deepseek_v3_r1 = global_num_experts == 256
+        global_redundant_expert_num = get_ascend_config(
+        ).init_redundancy_expert
+        is_deepseek_v3_r1 = global_num_experts - global_redundant_expert_num == 256
        forward_context = MagicMock(fused_moe_state=_get_fused_moe_state(
            ep_size, is_prefill, is_deepseek_v3_r1))
        with patch(