[EPLB][Bugfix] Get expert map from layers (#5817)

### What this PR does / why we need it? The initialization method of expert_map used by the eplb module is different from that used by the fused_moe module. This PR deletes the expert_map initialization method used by the eplb module to make the initialization methods consistent. #### before bugfix self._expert_map=tensor([64, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,62, 63], device='npu:1', dtype=torch.int32) self.shared_dict["expert_maps"][0]=tensor([-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64]], dtype=torch.int32) ### How was this patch tested? #### qwen3-235B-w8a8 aime | dataset | version | metric | mode | vllm-api-general-chat | |----- | ----- | ----- | ----- | -----| | aime2024 | 604a78 | accuracy | gen | 86.67 | - vLLM version: v0.13.0 - vLLM main: 2f4e6548ef Signed-off-by: shenchuxiaofugui <1311027364@qq.com>
2026-01-14 09:16:51 +08:00
parent 48ec97821a
commit ecf2fa482e
7 changed files with 23 additions and 173 deletions
--- a/tests/ut/eplb/adaptor/test_abstract_adaptor.py
+++ b/tests/ut/eplb/adaptor/test_abstract_adaptor.py
@@ -12,9 +12,6 @@ class DummyAdaptor(EplbAdaptor):
    def get_rank_expert_workload(self):
        return "workload"

-    def get_init_expert_map(self, num_moe_layers):
-        return {"layers": num_moe_layers}
-
    def do_update_expert_map(self, layer_id, updated_expert_map):
        return {"layer_id": layer_id, "map": updated_expert_map}

@@ -31,8 +28,6 @@ def test_base_class_methods_raise():
    adaptor = EplbAdaptor()
    with pytest.raises(NotImplementedError):
        adaptor.get_rank_expert_workload()
-    with pytest.raises(NotImplementedError):
-        adaptor.get_init_expert_map(1)
    with pytest.raises(NotImplementedError):
        adaptor.do_update_expert_map(1, {})
    with pytest.raises(NotImplementedError):
@@ -50,13 +45,6 @@ def test_get_rank_expert_workload():
    assert result == "workload"


-def test_get_init_expert_map():
-    adaptor = DummyAdaptor()
-    result = adaptor.get_init_expert_map(5)
-    assert isinstance(result, dict)
-    assert result["layers"] == 5
-
-
 def test_do_update_expert_map():
    adaptor = DummyAdaptor()
    updated = {"expert": 1}
--- a/tests/ut/eplb/core/test_eplb_utils.py
+++ b/tests/ut/eplb/core/test_eplb_utils.py
@@ -32,13 +32,14 @@ class TestAscendConfig(unittest.TestCase):
        self.moe_config = moe_config
        self.mock_npu = patch("torch.Tensor.npu",
                              new=lambda self: self).start()
+        self.rank = 1

    def test_init_eplb_config_with_eplb(self):
        expert_map, log2phy, redundant_experts = init_eplb_config(
            self.ascend_config, 0, self.moe_config)
        gt_expert_map = torch.tensor([4, -1, -1, -1, 0, 1, 2, 3])
        gt_log2phy = torch.tensor([9, 1, 2, 3, 5, 6, 7, 8])
-        self.assertTrue(torch.equal(expert_map, gt_expert_map))
+        self.assertTrue(torch.equal(expert_map[self.rank], gt_expert_map))
        self.assertTrue(torch.equal(log2phy, gt_log2phy))
        self.assertEqual(redundant_experts, 2)

@@ -49,7 +50,7 @@ class TestAscendConfig(unittest.TestCase):
            self.ascend_config, 0, self.moe_config)
        gt_expert_map = torch.tensor([-1, 1, 4, -1, 2, -1, 0, 3])
        gt_log2phy = torch.tensor([2, 6, 9, 3, 7, 4, 5, 8])
-        self.assertTrue(torch.equal(expert_map, gt_expert_map))
+        self.assertTrue(torch.equal(expert_map[self.rank], gt_expert_map))
        self.assertTrue(torch.equal(log2phy, gt_log2phy))
        self.assertEqual(redundant_experts, 2)

@@ -60,7 +61,7 @@ class TestAscendConfig(unittest.TestCase):
            self.ascend_config, 0, self.moe_config)
        gt_expert_map = torch.tensor([-1, -1, -1, -1, 0, 1, 2, 3])
        print(expert_map, log2phy, redundant_experts)
-        self.assertTrue(torch.equal(expert_map, gt_expert_map))
+        self.assertTrue(torch.equal(expert_map[self.rank], gt_expert_map))
        self.assertEqual(redundant_experts, 0)