[bugfix]change log2phy map to npu (#3339)
### What this PR does / why we need it? Resolved the issue of EPLB failure caused by changes in the log2phy map due to device type modifications when using MTP rotation position encoding. ### Does this PR introduce any user-facing change? ### How was this patch tested? https://github.com/vllm-project/vllm/commit/releases/v0.11.0 - vLLM version: v0.11.0 --------- Signed-off-by: offline0806 <3337230449@qq.com> Co-authored-by: offline0806 <3337230449@qq.com>
This commit is contained in:
@@ -56,7 +56,8 @@ def mock_dist_env(mocker: MockerFixture):
|
|||||||
# init dist env patch
|
# init dist env patch
|
||||||
dp_metadata = MagicMock(num_tokens_across_dp_cpu=[5, 5])
|
dp_metadata = MagicMock(num_tokens_across_dp_cpu=[5, 5])
|
||||||
|
|
||||||
with patch('torch.distributed.get_rank', return_value=0), \
|
with patch('torch.npu.is_available', return_value=True), \
|
||||||
|
patch('torch.distributed.get_rank', return_value=0), \
|
||||||
patch('torch.distributed.get_world_size', return_value=4), \
|
patch('torch.distributed.get_world_size', return_value=4), \
|
||||||
patch('vllm_ascend.torchair.ops.torchair_fused_moe.get_ep_group', return_value=mock_ep_and_mc2_group(mocker)), \
|
patch('vllm_ascend.torchair.ops.torchair_fused_moe.get_ep_group', return_value=mock_ep_and_mc2_group(mocker)), \
|
||||||
patch('vllm_ascend.torchair.ops.torchair_fused_moe.get_mc2_group', return_value=mock_ep_and_mc2_group(mocker)), \
|
patch('vllm_ascend.torchair.ops.torchair_fused_moe.get_mc2_group', return_value=mock_ep_and_mc2_group(mocker)), \
|
||||||
@@ -208,6 +209,7 @@ class MockFusedMoEMethod(FusedMoEMethodBase):
|
|||||||
|
|
||||||
class TestTorchairAscendFusedMoe:
|
class TestTorchairAscendFusedMoe:
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
def test_init_no_quant(self, mock_dist_env, default_moe_config):
|
def test_init_no_quant(self, mock_dist_env, default_moe_config):
|
||||||
layer = TorchairAscendFusedMoE(**default_moe_config)
|
layer = TorchairAscendFusedMoE(**default_moe_config)
|
||||||
|
|
||||||
@@ -237,6 +239,7 @@ class TestTorchairAscendFusedMoe:
|
|||||||
error_config['scoring_func'] = "random"
|
error_config['scoring_func'] = "random"
|
||||||
layer = TorchairAscendFusedMoE(**error_config)
|
layer = TorchairAscendFusedMoE(**error_config)
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
def test_init_with_quant(self, mock_dist_env, default_moe_config):
|
def test_init_with_quant(self, mock_dist_env, default_moe_config):
|
||||||
mock_quant_config = MagicMock()
|
mock_quant_config = MagicMock()
|
||||||
mock_quant_method = MockFusedMoEMethod()
|
mock_quant_method = MockFusedMoEMethod()
|
||||||
@@ -248,6 +251,7 @@ class TestTorchairAscendFusedMoe:
|
|||||||
assert moe.quant_method is not None
|
assert moe.quant_method is not None
|
||||||
assert isinstance(moe.quant_method, AscendFusedMoEMethod)
|
assert isinstance(moe.quant_method, AscendFusedMoEMethod)
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
def test_init_with_mixed_quant(self, mock_dist_env, default_moe_config):
|
def test_init_with_mixed_quant(self, mock_dist_env, default_moe_config):
|
||||||
mock_quant_config = MagicMock()
|
mock_quant_config = MagicMock()
|
||||||
mock_quant_method = MockFusedMoEMethod()
|
mock_quant_method = MockFusedMoEMethod()
|
||||||
@@ -261,6 +265,7 @@ class TestTorchairAscendFusedMoe:
|
|||||||
assert isinstance(moe.quant_method,
|
assert isinstance(moe.quant_method,
|
||||||
TorchairAscendUnquantizedFusedMoEMethod)
|
TorchairAscendUnquantizedFusedMoEMethod)
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
"others_param",
|
"others_param",
|
||||||
[[None,
|
[[None,
|
||||||
@@ -306,6 +311,7 @@ class TestTorchairAscendFusedMoe:
|
|||||||
else:
|
else:
|
||||||
assert output.shape == (num_tokens, 32)
|
assert output.shape == (num_tokens, 32)
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
def test_forward_ms_fused_moe_comp(self, mock_dist_env,
|
def test_forward_ms_fused_moe_comp(self, mock_dist_env,
|
||||||
default_moe_config):
|
default_moe_config):
|
||||||
inputs = torch.randn(5, 32)
|
inputs = torch.randn(5, 32)
|
||||||
|
|||||||
@@ -204,7 +204,7 @@ class AscendFusedMoE(FusedMoE):
|
|||||||
self.global_redundant_expert_num)
|
self.global_redundant_expert_num)
|
||||||
self.log2phy = determine_default_log2phy_map(
|
self.log2phy = determine_default_log2phy_map(
|
||||||
self.global_num_experts, self.ep_size, self.ep_rank,
|
self.global_num_experts, self.ep_size, self.ep_rank,
|
||||||
self.global_redundant_expert_num)
|
self.global_redundant_expert_num).npu()
|
||||||
local_num_experts = (torch.sum(
|
local_num_experts = (torch.sum(
|
||||||
self.expert_map != -1) if self.expert_map is not None else
|
self.expert_map != -1) if self.expert_map is not None else
|
||||||
self.global_num_experts)
|
self.global_num_experts)
|
||||||
|
|||||||
@@ -1045,7 +1045,7 @@ class TorchairAscendFusedMoE(FusedMoE):
|
|||||||
self.global_redundant_expert_num)
|
self.global_redundant_expert_num)
|
||||||
self.log2phy = determine_default_log2phy_map(
|
self.log2phy = determine_default_log2phy_map(
|
||||||
self.global_num_experts, self.ep_size, self.ep_rank,
|
self.global_num_experts, self.ep_size, self.ep_rank,
|
||||||
self.global_redundant_expert_num)
|
self.global_redundant_expert_num).npu()
|
||||||
local_num_experts = (torch.sum(self.expert_map != -1)
|
local_num_experts = (torch.sum(self.expert_map != -1)
|
||||||
if self.expert_map is not None else num_experts)
|
if self.expert_map is not None else num_experts)
|
||||||
if self.dynamic_eplb:
|
if self.dynamic_eplb:
|
||||||
|
|||||||
Reference in New Issue
Block a user