[EPLB][Bugfix] Get expert map from layers (#5817)

### What this PR does / why we need it?
The initialization method of expert_map used by the eplb module is
different from that used by the fused_moe module. This PR deletes the
expert_map initialization method used by the eplb module to make the
initialization methods consistent.

#### before bugfix
self._expert_map=tensor([64, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2,
3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58,
59, 60, 61,62, 63], device='npu:1', dtype=torch.int32)

self.shared_dict["expert_maps"][0]=tensor([-1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53,
54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64]], dtype=torch.int32)

### How was this patch tested?

#### qwen3-235B-w8a8 aime
| dataset | version | metric | mode | vllm-api-general-chat |
|----- | ----- | ----- | ----- | -----|
| aime2024 | 604a78 | accuracy | gen | 86.67 |

- vLLM version: v0.13.0
- vLLM main:
2f4e6548ef

Signed-off-by: shenchuxiaofugui <1311027364@qq.com>
This commit is contained in:
LI SHENGYONG
2026-01-14 09:16:51 +08:00
committed by GitHub
parent 48ec97821a
commit ecf2fa482e
7 changed files with 23 additions and 173 deletions

View File

@@ -12,9 +12,6 @@ class DummyAdaptor(EplbAdaptor):
def get_rank_expert_workload(self):
return "workload"
def get_init_expert_map(self, num_moe_layers):
return {"layers": num_moe_layers}
def do_update_expert_map(self, layer_id, updated_expert_map):
return {"layer_id": layer_id, "map": updated_expert_map}
@@ -31,8 +28,6 @@ def test_base_class_methods_raise():
adaptor = EplbAdaptor()
with pytest.raises(NotImplementedError):
adaptor.get_rank_expert_workload()
with pytest.raises(NotImplementedError):
adaptor.get_init_expert_map(1)
with pytest.raises(NotImplementedError):
adaptor.do_update_expert_map(1, {})
with pytest.raises(NotImplementedError):
@@ -50,13 +45,6 @@ def test_get_rank_expert_workload():
assert result == "workload"
def test_get_init_expert_map():
adaptor = DummyAdaptor()
result = adaptor.get_init_expert_map(5)
assert isinstance(result, dict)
assert result["layers"] == 5
def test_do_update_expert_map():
adaptor = DummyAdaptor()
updated = {"expert": 1}

View File

@@ -32,13 +32,14 @@ class TestAscendConfig(unittest.TestCase):
self.moe_config = moe_config
self.mock_npu = patch("torch.Tensor.npu",
new=lambda self: self).start()
self.rank = 1
def test_init_eplb_config_with_eplb(self):
expert_map, log2phy, redundant_experts = init_eplb_config(
self.ascend_config, 0, self.moe_config)
gt_expert_map = torch.tensor([4, -1, -1, -1, 0, 1, 2, 3])
gt_log2phy = torch.tensor([9, 1, 2, 3, 5, 6, 7, 8])
self.assertTrue(torch.equal(expert_map, gt_expert_map))
self.assertTrue(torch.equal(expert_map[self.rank], gt_expert_map))
self.assertTrue(torch.equal(log2phy, gt_log2phy))
self.assertEqual(redundant_experts, 2)
@@ -49,7 +50,7 @@ class TestAscendConfig(unittest.TestCase):
self.ascend_config, 0, self.moe_config)
gt_expert_map = torch.tensor([-1, 1, 4, -1, 2, -1, 0, 3])
gt_log2phy = torch.tensor([2, 6, 9, 3, 7, 4, 5, 8])
self.assertTrue(torch.equal(expert_map, gt_expert_map))
self.assertTrue(torch.equal(expert_map[self.rank], gt_expert_map))
self.assertTrue(torch.equal(log2phy, gt_log2phy))
self.assertEqual(redundant_experts, 2)
@@ -60,7 +61,7 @@ class TestAscendConfig(unittest.TestCase):
self.ascend_config, 0, self.moe_config)
gt_expert_map = torch.tensor([-1, -1, -1, -1, 0, 1, 2, 3])
print(expert_map, log2phy, redundant_experts)
self.assertTrue(torch.equal(expert_map, gt_expert_map))
self.assertTrue(torch.equal(expert_map[self.rank], gt_expert_map))
self.assertEqual(redundant_experts, 0)