[bugfix]change log2phy map to npu (#3339)

### What this PR does / why we need it?
Resolved the issue of EPLB failure caused by changes in the log2phy map
due to device type modifications when using MTP rotation position
encoding.

### Does this PR introduce any user-facing change?

### How was this patch tested?
https://github.com/vllm-project/vllm/commit/releases/v0.11.0


- vLLM version: v0.11.0

---------

Signed-off-by: offline0806 <3337230449@qq.com>
Co-authored-by: offline0806 <3337230449@qq.com>
This commit is contained in:
offline893
2025-10-10 08:47:55 +08:00
committed by GitHub
parent 55e23fabec
commit 1c2c72af8d
3 changed files with 9 additions and 3 deletions

View File

@@ -56,7 +56,8 @@ def mock_dist_env(mocker: MockerFixture):
# init dist env patch
dp_metadata = MagicMock(num_tokens_across_dp_cpu=[5, 5])
with patch('torch.distributed.get_rank', return_value=0), \
with patch('torch.npu.is_available', return_value=True), \
patch('torch.distributed.get_rank', return_value=0), \
patch('torch.distributed.get_world_size', return_value=4), \
patch('vllm_ascend.torchair.ops.torchair_fused_moe.get_ep_group', return_value=mock_ep_and_mc2_group(mocker)), \
patch('vllm_ascend.torchair.ops.torchair_fused_moe.get_mc2_group', return_value=mock_ep_and_mc2_group(mocker)), \
@@ -208,6 +209,7 @@ class MockFusedMoEMethod(FusedMoEMethodBase):
class TestTorchairAscendFusedMoe:
@pytest.fixture
def test_init_no_quant(self, mock_dist_env, default_moe_config):
layer = TorchairAscendFusedMoE(**default_moe_config)
@@ -237,6 +239,7 @@ class TestTorchairAscendFusedMoe:
error_config['scoring_func'] = "random"
layer = TorchairAscendFusedMoE(**error_config)
@pytest.fixture
def test_init_with_quant(self, mock_dist_env, default_moe_config):
mock_quant_config = MagicMock()
mock_quant_method = MockFusedMoEMethod()
@@ -248,6 +251,7 @@ class TestTorchairAscendFusedMoe:
assert moe.quant_method is not None
assert isinstance(moe.quant_method, AscendFusedMoEMethod)
@pytest.fixture
def test_init_with_mixed_quant(self, mock_dist_env, default_moe_config):
mock_quant_config = MagicMock()
mock_quant_method = MockFusedMoEMethod()
@@ -261,6 +265,7 @@ class TestTorchairAscendFusedMoe:
assert isinstance(moe.quant_method,
TorchairAscendUnquantizedFusedMoEMethod)
@pytest.fixture
@pytest.mark.parametrize(
"others_param",
[[None,
@@ -306,6 +311,7 @@ class TestTorchairAscendFusedMoe:
else:
assert output.shape == (num_tokens, 32)
@pytest.fixture
def test_forward_ms_fused_moe_comp(self, mock_dist_env,
default_moe_config):
inputs = torch.randn(5, 32)

View File

@@ -204,7 +204,7 @@ class AscendFusedMoE(FusedMoE):
self.global_redundant_expert_num)
self.log2phy = determine_default_log2phy_map(
self.global_num_experts, self.ep_size, self.ep_rank,
self.global_redundant_expert_num)
self.global_redundant_expert_num).npu()
local_num_experts = (torch.sum(
self.expert_map != -1) if self.expert_map is not None else
self.global_num_experts)

View File

@@ -1045,7 +1045,7 @@ class TorchairAscendFusedMoE(FusedMoE):
self.global_redundant_expert_num)
self.log2phy = determine_default_log2phy_map(
self.global_num_experts, self.ep_size, self.ep_rank,
self.global_redundant_expert_num)
self.global_redundant_expert_num).npu()
local_num_experts = (torch.sum(self.expert_map != -1)
if self.expert_map is not None else num_experts)
if self.dynamic_eplb: