diff --git a/tests/ut/torchair/ops/test_torchair_fused_moe.py b/tests/ut/torchair/ops/test_torchair_fused_moe.py index 70418a2..e8945d8 100644 --- a/tests/ut/torchair/ops/test_torchair_fused_moe.py +++ b/tests/ut/torchair/ops/test_torchair_fused_moe.py @@ -56,7 +56,8 @@ def mock_dist_env(mocker: MockerFixture): # init dist env patch dp_metadata = MagicMock(num_tokens_across_dp_cpu=[5, 5]) - with patch('torch.distributed.get_rank', return_value=0), \ + with patch('torch.npu.is_available', return_value=True), \ + patch('torch.distributed.get_rank', return_value=0), \ patch('torch.distributed.get_world_size', return_value=4), \ patch('vllm_ascend.torchair.ops.torchair_fused_moe.get_ep_group', return_value=mock_ep_and_mc2_group(mocker)), \ patch('vllm_ascend.torchair.ops.torchair_fused_moe.get_mc2_group', return_value=mock_ep_and_mc2_group(mocker)), \ @@ -208,6 +209,7 @@ class MockFusedMoEMethod(FusedMoEMethodBase): class TestTorchairAscendFusedMoe: + @pytest.fixture def test_init_no_quant(self, mock_dist_env, default_moe_config): layer = TorchairAscendFusedMoE(**default_moe_config) @@ -237,6 +239,7 @@ class TestTorchairAscendFusedMoe: error_config['scoring_func'] = "random" layer = TorchairAscendFusedMoE(**error_config) + @pytest.fixture def test_init_with_quant(self, mock_dist_env, default_moe_config): mock_quant_config = MagicMock() mock_quant_method = MockFusedMoEMethod() @@ -248,6 +251,7 @@ class TestTorchairAscendFusedMoe: assert moe.quant_method is not None assert isinstance(moe.quant_method, AscendFusedMoEMethod) + @pytest.fixture def test_init_with_mixed_quant(self, mock_dist_env, default_moe_config): mock_quant_config = MagicMock() mock_quant_method = MockFusedMoEMethod() @@ -261,6 +265,7 @@ class TestTorchairAscendFusedMoe: assert isinstance(moe.quant_method, TorchairAscendUnquantizedFusedMoEMethod) + @pytest.fixture @pytest.mark.parametrize( "others_param", [[None, @@ -306,6 +311,7 @@ class TestTorchairAscendFusedMoe: else: assert output.shape == (num_tokens, 32) + @pytest.fixture def test_forward_ms_fused_moe_comp(self, mock_dist_env, default_moe_config): inputs = torch.randn(5, 32) diff --git a/vllm_ascend/ops/common_fused_moe.py b/vllm_ascend/ops/common_fused_moe.py index 6718135..c57146b 100644 --- a/vllm_ascend/ops/common_fused_moe.py +++ b/vllm_ascend/ops/common_fused_moe.py @@ -204,7 +204,7 @@ class AscendFusedMoE(FusedMoE): self.global_redundant_expert_num) self.log2phy = determine_default_log2phy_map( self.global_num_experts, self.ep_size, self.ep_rank, - self.global_redundant_expert_num) + self.global_redundant_expert_num).npu() local_num_experts = (torch.sum( self.expert_map != -1) if self.expert_map is not None else self.global_num_experts) diff --git a/vllm_ascend/torchair/ops/torchair_fused_moe.py b/vllm_ascend/torchair/ops/torchair_fused_moe.py index 9d232f8..3e63831 100644 --- a/vllm_ascend/torchair/ops/torchair_fused_moe.py +++ b/vllm_ascend/torchair/ops/torchair_fused_moe.py @@ -1045,7 +1045,7 @@ class TorchairAscendFusedMoE(FusedMoE): self.global_redundant_expert_num) self.log2phy = determine_default_log2phy_map( self.global_num_experts, self.ep_size, self.ep_rank, - self.global_redundant_expert_num) + self.global_redundant_expert_num).npu() local_num_experts = (torch.sum(self.expert_map != -1) if self.expert_map is not None else num_experts) if self.dynamic_eplb: