[bugfix]change log2phy map to npu (#3339)
### What this PR does / why we need it? Resolved the issue of EPLB failure caused by changes in the log2phy map due to device type modifications when using MTP rotation position encoding. ### Does this PR introduce any user-facing change? ### How was this patch tested? https://github.com/vllm-project/vllm/commit/releases/v0.11.0 - vLLM version: v0.11.0 --------- Signed-off-by: offline0806 <3337230449@qq.com> Co-authored-by: offline0806 <3337230449@qq.com>
This commit is contained in:
@@ -56,7 +56,8 @@ def mock_dist_env(mocker: MockerFixture):
|
||||
# init dist env patch
|
||||
dp_metadata = MagicMock(num_tokens_across_dp_cpu=[5, 5])
|
||||
|
||||
with patch('torch.distributed.get_rank', return_value=0), \
|
||||
with patch('torch.npu.is_available', return_value=True), \
|
||||
patch('torch.distributed.get_rank', return_value=0), \
|
||||
patch('torch.distributed.get_world_size', return_value=4), \
|
||||
patch('vllm_ascend.torchair.ops.torchair_fused_moe.get_ep_group', return_value=mock_ep_and_mc2_group(mocker)), \
|
||||
patch('vllm_ascend.torchair.ops.torchair_fused_moe.get_mc2_group', return_value=mock_ep_and_mc2_group(mocker)), \
|
||||
@@ -208,6 +209,7 @@ class MockFusedMoEMethod(FusedMoEMethodBase):
|
||||
|
||||
class TestTorchairAscendFusedMoe:
|
||||
|
||||
@pytest.fixture
|
||||
def test_init_no_quant(self, mock_dist_env, default_moe_config):
|
||||
layer = TorchairAscendFusedMoE(**default_moe_config)
|
||||
|
||||
@@ -237,6 +239,7 @@ class TestTorchairAscendFusedMoe:
|
||||
error_config['scoring_func'] = "random"
|
||||
layer = TorchairAscendFusedMoE(**error_config)
|
||||
|
||||
@pytest.fixture
|
||||
def test_init_with_quant(self, mock_dist_env, default_moe_config):
|
||||
mock_quant_config = MagicMock()
|
||||
mock_quant_method = MockFusedMoEMethod()
|
||||
@@ -248,6 +251,7 @@ class TestTorchairAscendFusedMoe:
|
||||
assert moe.quant_method is not None
|
||||
assert isinstance(moe.quant_method, AscendFusedMoEMethod)
|
||||
|
||||
@pytest.fixture
|
||||
def test_init_with_mixed_quant(self, mock_dist_env, default_moe_config):
|
||||
mock_quant_config = MagicMock()
|
||||
mock_quant_method = MockFusedMoEMethod()
|
||||
@@ -261,6 +265,7 @@ class TestTorchairAscendFusedMoe:
|
||||
assert isinstance(moe.quant_method,
|
||||
TorchairAscendUnquantizedFusedMoEMethod)
|
||||
|
||||
@pytest.fixture
|
||||
@pytest.mark.parametrize(
|
||||
"others_param",
|
||||
[[None,
|
||||
@@ -306,6 +311,7 @@ class TestTorchairAscendFusedMoe:
|
||||
else:
|
||||
assert output.shape == (num_tokens, 32)
|
||||
|
||||
@pytest.fixture
|
||||
def test_forward_ms_fused_moe_comp(self, mock_dist_env,
|
||||
default_moe_config):
|
||||
inputs = torch.randn(5, 32)
|
||||
|
||||
@@ -204,7 +204,7 @@ class AscendFusedMoE(FusedMoE):
|
||||
self.global_redundant_expert_num)
|
||||
self.log2phy = determine_default_log2phy_map(
|
||||
self.global_num_experts, self.ep_size, self.ep_rank,
|
||||
self.global_redundant_expert_num)
|
||||
self.global_redundant_expert_num).npu()
|
||||
local_num_experts = (torch.sum(
|
||||
self.expert_map != -1) if self.expert_map is not None else
|
||||
self.global_num_experts)
|
||||
|
||||
@@ -1045,7 +1045,7 @@ class TorchairAscendFusedMoE(FusedMoE):
|
||||
self.global_redundant_expert_num)
|
||||
self.log2phy = determine_default_log2phy_map(
|
||||
self.global_num_experts, self.ep_size, self.ep_rank,
|
||||
self.global_redundant_expert_num)
|
||||
self.global_redundant_expert_num).npu()
|
||||
local_num_experts = (torch.sum(self.expert_map != -1)
|
||||
if self.expert_map is not None else num_experts)
|
||||
if self.dynamic_eplb:
|
||||
|
||||
Reference in New Issue
Block a user