[EPLB][refactor] Modification of the initialization logic for expert_map and log2phy(depend on pr5285) (#5311)
### What this PR does / why we need it?
Unify the loading logic for expert_map and log2phy.
1. The map generated when enabling the redundancy expert is incorrect.
The community generation map function only accepts the number of global
experts. When we pass in the number of logical experts plus redundant
experts, the local expert ID of the last card will index to an expert ID
that does not exist. Now we ensure that the index points to a real
existing expert ID, and each expert can be accessed. Moreover, when
redundant experts are not enabled, the output of our function remains
consistent with the community's function.
2. The map we generate is based on the length of the physical expert,
but in reality, we only need to use the length of the logical expert.
Later on, we will need to pad it accordingly, so we can simply generate
a map with the length of the logical [expert.]
3. Unify the initialization logic across different scenarios and
simplify the code for fused_moe.
**Before refactoring**
- map path is not None:
expert map: get_rank_placement_map from _'expert_load_balancer.py'_,
maintains the map for all ranks and all layers.
log2phy: get_rank_log2phy_map from _'expert_load_balancer.py'_,
maintains the map for all ranks and all layers.
- map path is None:
expert map: determine_expert_map from '_vllm.laye_r', The function does
not support the redundant experts of vllm-ascend.
log2phy: determine_default_log2phy_map from _'eplb_utils.py'_. The
function does not support the redundant experts of vllm-ascend.
**Refactoring**
eplb_utils.py
init_eplb_config
generate placement
generate expert map
generate log2phy
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
Expert Mapping Test Generation:
ep size: 16, num of experts: 256, num of redundant experts: 16
+++++++++++++++++++++++++++++++++++++++++
Expert Mapping (Non-1 indicates the expert responsible for this rank)
for Rank 15:
vllm map:
[-1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
-1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
-1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 0 1 2 3 4 5 6 7 8
9 10 11 12 13 14 15 16]
+++++++++++++++++++++++++++++++++++++++++
Improved map:
[16 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
-1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15]
Expert Mapping Test Generation:
ep size: 16, num of experts: 256, num of redundant experts: 0
+++++++++++++++++++++++++++++++++++++++++
Expert Mapping (Non-1 indicates the expert responsible for this rank)
for Rank 15:
vllm map:
[-1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
-1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15]
+++++++++++++++++++++++++++++++++++++++
Improved map:
[-1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
-1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15]
dsr1 baselie:
| dataset | version | metric | mode | vllm-api-general-chat |
|----- | ----- | ----- | ----- | -----|
| gsm8k-lite | 7cd45e | accuracy | gen | 100.00 |
dsr1 eplb:
| dataset | version | metric | mode | vllm-api-general-chat |
|----- | ----- | ----- | ----- | -----|
| gsm8k-lite | 7cd45e | accuracy | gen | 100.00 |
- vLLM version: release/v0.13.0
- vLLM main:
5fbfa8d9ef
Signed-off-by: shenchuxiaofugui <1311027364@qq.com>
Co-authored-by: weijinqian0 <1184188277@qq.com>
This commit is contained in:
17
tests/ut/eplb/core/expert_map.json
Normal file
17
tests/ut/eplb/core/expert_map.json
Normal file
@@ -0,0 +1,17 @@
|
||||
{
|
||||
"moe_layer_count":
|
||||
1,
|
||||
"layer_list": [{
|
||||
"layer_id":
|
||||
0,
|
||||
"device_count":
|
||||
2,
|
||||
"device_list": [{
|
||||
"device_id": 0,
|
||||
"device_expert": [7, 2, 0, 3, 5]
|
||||
}, {
|
||||
"device_id": 1,
|
||||
"device_expert": [6, 1, 4, 7, 2]
|
||||
}]
|
||||
}]
|
||||
}
|
||||
@@ -1,49 +1,67 @@
|
||||
import random
|
||||
import os
|
||||
import sys
|
||||
import unittest
|
||||
from unittest.mock import patch
|
||||
|
||||
# isort: off
|
||||
import pytest
|
||||
import torch
|
||||
from vllm.config import VllmConfig
|
||||
from vllm.model_executor.layers.fused_moe.config import (FusedMoEConfig,
|
||||
FusedMoEParallelConfig
|
||||
)
|
||||
|
||||
from vllm_ascend.eplb.core import eplb_utils
|
||||
from vllm_ascend.eplb.core.eplb_utils import EPLBParamUtils
|
||||
from vllm_ascend.ascend_config import init_ascend_config
|
||||
from vllm_ascend.eplb.core.eplb_utils import EPLBParamUtils, init_eplb_config
|
||||
# isort: on
|
||||
|
||||
|
||||
def test_generate_log2phy_map_single_rank_holding():
|
||||
class TestAscendConfig(unittest.TestCase):
|
||||
|
||||
expert_map = torch.tensor([[0, -1], [-1, 0]], dtype=torch.int32)
|
||||
log2phy_map = eplb_utils.generate_log2phy_map(expert_map)
|
||||
def setUp(self):
|
||||
vllm_config = VllmConfig()
|
||||
ascend_config = init_ascend_config(vllm_config)
|
||||
ascend_config.dynamic_eplb = True
|
||||
ascend_config.init_redundancy_expert = 2
|
||||
moe_parallel_config = FusedMoEParallelConfig(2, 0, 1, 2, 1, 1, 1, 1,
|
||||
True, "hccl")
|
||||
moe_config = FusedMoEConfig(8, 8, 8192, 5, moe_parallel_config,
|
||||
torch.float16)
|
||||
moe_config.supports_eplb = True
|
||||
self.ascend_config = ascend_config
|
||||
self.moe_config = moe_config
|
||||
self.mock_npu = patch("torch.Tensor.npu",
|
||||
new=lambda self: self).start()
|
||||
|
||||
assert torch.all(log2phy_map[:, 0] == log2phy_map[0, 0])
|
||||
assert torch.all(log2phy_map[:, 1] == log2phy_map[1, 1])
|
||||
def test_init_eplb_config_with_eplb(self):
|
||||
expert_map, log2phy, redundant_experts = init_eplb_config(
|
||||
self.ascend_config, 0, self.moe_config)
|
||||
gt_expert_map = torch.tensor([4, -1, -1, -1, 0, 1, 2, 3])
|
||||
gt_log2phy = torch.tensor([9, 1, 2, 3, 5, 6, 7, 8])
|
||||
self.assertTrue(torch.equal(expert_map, gt_expert_map))
|
||||
self.assertTrue(torch.equal(log2phy, gt_log2phy))
|
||||
self.assertEqual(redundant_experts, 2)
|
||||
|
||||
def test_init_eplb_config_with_eplb_withmap(self):
|
||||
_TEST_DIR = os.path.dirname(__file__)
|
||||
self.ascend_config.expert_map_path = _TEST_DIR + "/expert_map.json"
|
||||
expert_map, log2phy, redundant_experts = init_eplb_config(
|
||||
self.ascend_config, 0, self.moe_config)
|
||||
gt_expert_map = torch.tensor([-1, 1, 4, -1, 2, -1, 0, 3])
|
||||
gt_log2phy = torch.tensor([2, 6, 9, 3, 7, 4, 5, 8])
|
||||
self.assertTrue(torch.equal(expert_map, gt_expert_map))
|
||||
self.assertTrue(torch.equal(log2phy, gt_log2phy))
|
||||
self.assertEqual(redundant_experts, 2)
|
||||
|
||||
def test_generate_log2phy_map_multiple_rank_holding(monkeypatch):
|
||||
|
||||
expert_map = torch.tensor([[0], [0]], dtype=torch.int32)
|
||||
|
||||
monkeypatch.setattr(random, "choice", lambda x: x[0])
|
||||
|
||||
log2phy_map = eplb_utils.generate_log2phy_map(expert_map)
|
||||
|
||||
assert log2phy_map.shape == (2, 1)
|
||||
assert (log2phy_map >= 0).all()
|
||||
|
||||
|
||||
def test_determine_default_log2phy_map_world_size_1():
|
||||
log2phy = eplb_utils.determine_default_log2phy_map(global_expert_num=3,
|
||||
world_size=1,
|
||||
rank_id=0)
|
||||
assert log2phy.shape == (3, )
|
||||
assert (log2phy >= 0).all()
|
||||
|
||||
|
||||
def test_determine_default_log2phy_map_world_size_multiple():
|
||||
log2phy = eplb_utils.determine_default_log2phy_map(global_expert_num=6,
|
||||
world_size=2,
|
||||
rank_id=1)
|
||||
assert log2phy.shape == (6, )
|
||||
assert (log2phy >= 0).all()
|
||||
def test_init_eplb_config_without_eplb(self):
|
||||
self.ascend_config.dynamic_eplb = False
|
||||
self.ascend_config.expert_map_path = None
|
||||
expert_map, log2phy, redundant_experts = init_eplb_config(
|
||||
self.ascend_config, 0, self.moe_config)
|
||||
gt_expert_map = torch.tensor([-1, -1, -1, -1, 0, 1, 2, 3])
|
||||
print(expert_map, log2phy, redundant_experts)
|
||||
self.assertTrue(torch.equal(expert_map, gt_expert_map))
|
||||
self.assertEqual(redundant_experts, 0)
|
||||
|
||||
|
||||
class TestEPLBParamUtils:
|
||||
|
||||
Reference in New Issue
Block a user