xc-llm-ascend/tests/ut/ops/test_moe_mlp.py

import unittest
from typing import ClassVar
from unittest.mock import patch

import torch

from vllm_ascend.ops.fused_moe.moe_mlp import cumsum_group_list, unified_apply_mlp
from vllm_ascend.ops.fused_moe.moe_runtime_args import (
    MoEMlpComputeInput,
    MoEQuantParams,
    MoEWeights,
)
from vllm_ascend.ops.fused_moe.moe_stage_params import MoEMxfpParams
from vllm_ascend.quantization.quant_type import QuantType


class TestCumsumGroupList(unittest.TestCase):
    glist_dict: ClassVar[dict[int, torch.Tensor]]

    @classmethod
    def setUpClass(cls):
        cls.glist_dict = {
            0: torch.tensor([0, 2, 3, 3]),
            1: torch.tensor([0, 2, 1, 0]),
            2: torch.tensor([[1, 2], [2, 1], [0, 0], [0, 0]]),
        }

    support_combine = [(0, 0), (1, 0), (0, 1)]
    unsupported_combine = [(0, 2), (2, 1), (1, 2)]

    def test_cumsum_group_list_supported_conversion(self):
        for src_list_type, dst_list_type in self.support_combine:
            with self.subTest(src=src_list_type, dst=dst_list_type):
                result = cumsum_group_list(self.glist_dict[src_list_type], src_list_type, dst_list_type, expert_num=4)
                self.assertTrue(torch.equal(result, self.glist_dict[dst_list_type]))

    def test_cumsum_group_list_invalid_type_valueerror(self):
        with self.assertRaises(ValueError) as excinfo:
            cumsum_group_list(self.glist_dict[0], 4, 0)
        self.assertIn("group_list_type should be in [0, 1, 2], but received", str(excinfo.exception))

    def test_cumsum_group_list_unsupported_conversion_notimplementederror(self):
        for src_list_type, dst_list_type in self.unsupported_combine:
            with self.subTest(src=src_list_type, dst=dst_list_type):
                with self.assertRaises(NotImplementedError) as excinfo:
                    cumsum_group_list(self.glist_dict[0], src_list_type, dst_list_type)
                self.assertIn("This feature is under development.", str(excinfo.exception))


class TestUnifiedApplyMlpRequest(unittest.TestCase):
    def test_request_unquant_path(self):
        hidden_states = torch.randn(2, 8)
        expected = torch.randn(2, 8)
        mlp_compute_input = MoEMlpComputeInput(
            hidden_states=hidden_states,
            group_list=torch.tensor([2, 2], dtype=torch.int64),
            group_list_type=1,
            dynamic_scale=None,
            topk_scales=None,
            weights=MoEWeights(
                w1=torch.randn(1, 16, 8),
                w2=torch.randn(1, 8, 8),
                w1_bias=torch.randn(1, 16),
                w2_bias=torch.randn(1, 8),
            ),
            quant=MoEQuantParams(quant_type=QuantType.NONE),
            fusion=False,
            activation="silu",
            need_trans=False,
            dynamic_eplb=False,
        )

        with (
            patch("vllm_ascend.ops.fused_moe.moe_mlp.unquant_apply_mlp", return_value=expected) as mock_unquant,
            patch("vllm_ascend.ops.fused_moe.moe_mlp.quant_apply_mlp") as mock_quant,
        ):
            output = unified_apply_mlp(mlp_compute_input=mlp_compute_input)

        self.assertTrue(output is expected)
        mock_unquant.assert_called_once()
        self.assertEqual(mock_unquant.call_args.kwargs["activation"], "silu")
        self.assertFalse(mock_unquant.call_args.kwargs["need_trans"])
        mock_quant.assert_not_called()

    def test_request_quant_path(self):
        hidden_states = torch.randn(2, 8)
        expected = torch.randn(2, 8)
        mlp_compute_input = MoEMlpComputeInput(
            hidden_states=hidden_states,
            group_list=torch.tensor([2, 2], dtype=torch.int64),
            group_list_type=1,
            dynamic_scale=torch.randn(2, 1),
            topk_scales=None,
            weights=MoEWeights(
                w1=torch.randn(1, 16, 8),
                w2=torch.randn(1, 8, 8),
                w1_scale=[torch.randn(1)],
                w2_scale=[torch.randn(1)],
            ),
            quant=MoEQuantParams(
                quant_type=QuantType.MXFP8,
                mxfp=MoEMxfpParams(
                    act_quant_type=torch.float8_e4m3fn,
                    weight_quant_type=torch.float8_e4m3fn,
                    use_bf16=False,
                ),
            ),
            fusion=True,
            activation="silu",
            need_trans=False,
            dynamic_eplb=True,
        )

        with (
            patch("vllm_ascend.ops.fused_moe.moe_mlp.quant_apply_mlp", return_value=expected) as mock_quant,
            patch("vllm_ascend.ops.fused_moe.moe_mlp.unquant_apply_mlp") as mock_unquant,
        ):
            output = unified_apply_mlp(mlp_compute_input=mlp_compute_input)

        self.assertTrue(output is expected)
        mock_quant.assert_called_once()
        quant_kwargs = mock_quant.call_args.kwargs
        self.assertTrue(quant_kwargs["use_mxfp_quant"])
        self.assertTrue(quant_kwargs["fusion"])
        self.assertTrue(quant_kwargs["dynamic_eplb"])
        self.assertFalse(quant_kwargs["use_bf16"])
        mock_unquant.assert_not_called()


if __name__ == "__main__":
    unittest.main(verbosity=2)
[UT]Ut for function cumsum_group_list in moe_mlp (ref #5025) (#5036) ### What this PR does / why we need it? Add ut for the cumsum_group_list function, which is related to the precision issues stemming from the moe_mlp.py . The ralated PR is https://github.com/vllm-project/vllm-ascend/pull/5025 ### Does this PR introduce _any_ user-facing change? No - vLLM version: v0.12.0 - vLLM main: https://github.com/vllm-project/vllm/commit/ad32e3e19ccf0526cb6744a5fed09a138a5fb2f9 --------- Signed-off-by: tanqingshan (A) <50050625@china.huawei.com> Co-authored-by: tanqingshan (A) <50050625@china.huawei.com> Co-authored-by: wangxiyuan <wangxiyuan1007@gmail.com> Co-authored-by: weijinqian0 <1184188277@qq.com> 2025-12-18 15:00:16 +08:00			`import unittest`
			`from typing import ClassVar`
[refactor] replace scattered business kwargs with typed request objects and explicit stage boundaries (#7024) ### What this PR does / why we need it? Refactor `vllm_ascend/ops/fused_moe` to replace scattered MoE business `**kwargs` with typed request objects and explicit stage boundaries. - Prepare, dispatch, MLP, and quant stages now have clearer ownership. - Main MoE path no longer depends on business `kwargs.get(...)` lookups. - Comm and dispatcher interfaces are request-only on the main path. - UTs can assert stage-level fields directly instead of inferring behavior indirectly. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? CI passed. --------- Signed-off-by: linfeng-yuan <1102311262@qq.com> 2026-03-20 23:23:57 +08:00			`from unittest.mock import patch`
[UT]Ut for function cumsum_group_list in moe_mlp (ref #5025) (#5036) ### What this PR does / why we need it? Add ut for the cumsum_group_list function, which is related to the precision issues stemming from the moe_mlp.py . The ralated PR is https://github.com/vllm-project/vllm-ascend/pull/5025 ### Does this PR introduce _any_ user-facing change? No - vLLM version: v0.12.0 - vLLM main: https://github.com/vllm-project/vllm/commit/ad32e3e19ccf0526cb6744a5fed09a138a5fb2f9 --------- Signed-off-by: tanqingshan (A) <50050625@china.huawei.com> Co-authored-by: tanqingshan (A) <50050625@china.huawei.com> Co-authored-by: wangxiyuan <wangxiyuan1007@gmail.com> Co-authored-by: weijinqian0 <1184188277@qq.com> 2025-12-18 15:00:16 +08:00
			`import torch`

[refactor] replace scattered business kwargs with typed request objects and explicit stage boundaries (#7024) ### What this PR does / why we need it? Refactor `vllm_ascend/ops/fused_moe` to replace scattered MoE business `**kwargs` with typed request objects and explicit stage boundaries. - Prepare, dispatch, MLP, and quant stages now have clearer ownership. - Main MoE path no longer depends on business `kwargs.get(...)` lookups. - Comm and dispatcher interfaces are request-only on the main path. - UTs can assert stage-level fields directly instead of inferring behavior indirectly. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? CI passed. --------- Signed-off-by: linfeng-yuan <1102311262@qq.com> 2026-03-20 23:23:57 +08:00			`from vllm_ascend.ops.fused_moe.moe_mlp import cumsum_group_list, unified_apply_mlp`
			`from vllm_ascend.ops.fused_moe.moe_runtime_args import (`
			`MoEMlpComputeInput,`
			`MoEQuantParams,`
			`MoEWeights,`
			`)`
			`from vllm_ascend.ops.fused_moe.moe_stage_params import MoEMxfpParams`
			`from vllm_ascend.quantization.quant_type import QuantType`
[UT]Ut for function cumsum_group_list in moe_mlp (ref #5025) (#5036) ### What this PR does / why we need it? Add ut for the cumsum_group_list function, which is related to the precision issues stemming from the moe_mlp.py . The ralated PR is https://github.com/vllm-project/vllm-ascend/pull/5025 ### Does this PR introduce _any_ user-facing change? No - vLLM version: v0.12.0 - vLLM main: https://github.com/vllm-project/vllm/commit/ad32e3e19ccf0526cb6744a5fed09a138a5fb2f9 --------- Signed-off-by: tanqingshan (A) <50050625@china.huawei.com> Co-authored-by: tanqingshan (A) <50050625@china.huawei.com> Co-authored-by: wangxiyuan <wangxiyuan1007@gmail.com> Co-authored-by: weijinqian0 <1184188277@qq.com> 2025-12-18 15:00:16 +08:00

			`class TestCumsumGroupList(unittest.TestCase):`
			`glist_dict: ClassVar[dict[int, torch.Tensor]]`

			`@classmethod`
			`def setUpClass(cls):`
			`cls.glist_dict = {`
			`0: torch.tensor([0, 2, 3, 3]),`
			`1: torch.tensor([0, 2, 1, 0]),`
[refactor] replace scattered business kwargs with typed request objects and explicit stage boundaries (#7024) ### What this PR does / why we need it? Refactor `vllm_ascend/ops/fused_moe` to replace scattered MoE business `**kwargs` with typed request objects and explicit stage boundaries. - Prepare, dispatch, MLP, and quant stages now have clearer ownership. - Main MoE path no longer depends on business `kwargs.get(...)` lookups. - Comm and dispatcher interfaces are request-only on the main path. - UTs can assert stage-level fields directly instead of inferring behavior indirectly. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? CI passed. --------- Signed-off-by: linfeng-yuan <1102311262@qq.com> 2026-03-20 23:23:57 +08:00			`2: torch.tensor([[1, 2], [2, 1], [0, 0], [0, 0]]),`
[UT]Ut for function cumsum_group_list in moe_mlp (ref #5025) (#5036) ### What this PR does / why we need it? Add ut for the cumsum_group_list function, which is related to the precision issues stemming from the moe_mlp.py . The ralated PR is https://github.com/vllm-project/vllm-ascend/pull/5025 ### Does this PR introduce _any_ user-facing change? No - vLLM version: v0.12.0 - vLLM main: https://github.com/vllm-project/vllm/commit/ad32e3e19ccf0526cb6744a5fed09a138a5fb2f9 --------- Signed-off-by: tanqingshan (A) <50050625@china.huawei.com> Co-authored-by: tanqingshan (A) <50050625@china.huawei.com> Co-authored-by: wangxiyuan <wangxiyuan1007@gmail.com> Co-authored-by: weijinqian0 <1184188277@qq.com> 2025-12-18 15:00:16 +08:00			`}`

			`support_combine = [(0, 0), (1, 0), (0, 1)]`
[CI]Fixed the spell check function in `typos.toml` (#6753) ### What this PR does / why we need it? The incorrect regular expression syntax `.[UE4M3\|ue4m3].` actually ignores all words containing any of the following characters: `u, e, 4, m, 3, \|` ```yaml extend-ignore-identifiers-re = [".Unc.", "._thw", ".UE8M0.", ".[UE4M3\|ue4m3].", ".eles.", ".fo.", ".ba.", ".ot.", ".[Tt]h[rR]."] ``` ===fix===> ```yaml extend-ignore-identifiers-re = [".Unc.", "._thw", ".UE8M0.", ".(UE4M3\|ue4m3]).", ".eles.", ".fo.", ".ba.", ".ot.", ".[Tt]h[rR]."] ``` ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? - vLLM version: v0.15.0 - vLLM main: https://github.com/vllm-project/vllm/commit/9562912cead1f11e8540fb91306c5cbda66f0007 Signed-off-by: MrZ20 <2609716663@qq.com> 2026-02-14 11:57:26 +08:00			`unsupported_combine = [(0, 2), (2, 1), (1, 2)]`
[UT]Ut for function cumsum_group_list in moe_mlp (ref #5025) (#5036) ### What this PR does / why we need it? Add ut for the cumsum_group_list function, which is related to the precision issues stemming from the moe_mlp.py . The ralated PR is https://github.com/vllm-project/vllm-ascend/pull/5025 ### Does this PR introduce _any_ user-facing change? No - vLLM version: v0.12.0 - vLLM main: https://github.com/vllm-project/vllm/commit/ad32e3e19ccf0526cb6744a5fed09a138a5fb2f9 --------- Signed-off-by: tanqingshan (A) <50050625@china.huawei.com> Co-authored-by: tanqingshan (A) <50050625@china.huawei.com> Co-authored-by: wangxiyuan <wangxiyuan1007@gmail.com> Co-authored-by: weijinqian0 <1184188277@qq.com> 2025-12-18 15:00:16 +08:00
			`def test_cumsum_group_list_supported_conversion(self):`
			`for src_list_type, dst_list_type in self.support_combine:`
			`with self.subTest(src=src_list_type, dst=dst_list_type):`
[refactor] replace scattered business kwargs with typed request objects and explicit stage boundaries (#7024) ### What this PR does / why we need it? Refactor `vllm_ascend/ops/fused_moe` to replace scattered MoE business `**kwargs` with typed request objects and explicit stage boundaries. - Prepare, dispatch, MLP, and quant stages now have clearer ownership. - Main MoE path no longer depends on business `kwargs.get(...)` lookups. - Comm and dispatcher interfaces are request-only on the main path. - UTs can assert stage-level fields directly instead of inferring behavior indirectly. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? CI passed. --------- Signed-off-by: linfeng-yuan <1102311262@qq.com> 2026-03-20 23:23:57 +08:00			`result = cumsum_group_list(self.glist_dict[src_list_type], src_list_type, dst_list_type, expert_num=4)`
			`self.assertTrue(torch.equal(result, self.glist_dict[dst_list_type]))`
[UT]Ut for function cumsum_group_list in moe_mlp (ref #5025) (#5036) ### What this PR does / why we need it? Add ut for the cumsum_group_list function, which is related to the precision issues stemming from the moe_mlp.py . The ralated PR is https://github.com/vllm-project/vllm-ascend/pull/5025 ### Does this PR introduce _any_ user-facing change? No - vLLM version: v0.12.0 - vLLM main: https://github.com/vllm-project/vllm/commit/ad32e3e19ccf0526cb6744a5fed09a138a5fb2f9 --------- Signed-off-by: tanqingshan (A) <50050625@china.huawei.com> Co-authored-by: tanqingshan (A) <50050625@china.huawei.com> Co-authored-by: wangxiyuan <wangxiyuan1007@gmail.com> Co-authored-by: weijinqian0 <1184188277@qq.com> 2025-12-18 15:00:16 +08:00
			`def test_cumsum_group_list_invalid_type_valueerror(self):`
			`with self.assertRaises(ValueError) as excinfo:`
			`cumsum_group_list(self.glist_dict[0], 4, 0)`
[refactor] replace scattered business kwargs with typed request objects and explicit stage boundaries (#7024) ### What this PR does / why we need it? Refactor `vllm_ascend/ops/fused_moe` to replace scattered MoE business `**kwargs` with typed request objects and explicit stage boundaries. - Prepare, dispatch, MLP, and quant stages now have clearer ownership. - Main MoE path no longer depends on business `kwargs.get(...)` lookups. - Comm and dispatcher interfaces are request-only on the main path. - UTs can assert stage-level fields directly instead of inferring behavior indirectly. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? CI passed. --------- Signed-off-by: linfeng-yuan <1102311262@qq.com> 2026-03-20 23:23:57 +08:00			`self.assertIn("group_list_type should be in [0, 1, 2], but received", str(excinfo.exception))`
[UT]Ut for function cumsum_group_list in moe_mlp (ref #5025) (#5036) ### What this PR does / why we need it? Add ut for the cumsum_group_list function, which is related to the precision issues stemming from the moe_mlp.py . The ralated PR is https://github.com/vllm-project/vllm-ascend/pull/5025 ### Does this PR introduce _any_ user-facing change? No - vLLM version: v0.12.0 - vLLM main: https://github.com/vllm-project/vllm/commit/ad32e3e19ccf0526cb6744a5fed09a138a5fb2f9 --------- Signed-off-by: tanqingshan (A) <50050625@china.huawei.com> Co-authored-by: tanqingshan (A) <50050625@china.huawei.com> Co-authored-by: wangxiyuan <wangxiyuan1007@gmail.com> Co-authored-by: weijinqian0 <1184188277@qq.com> 2025-12-18 15:00:16 +08:00
[refactor] replace scattered business kwargs with typed request objects and explicit stage boundaries (#7024) ### What this PR does / why we need it? Refactor `vllm_ascend/ops/fused_moe` to replace scattered MoE business `**kwargs` with typed request objects and explicit stage boundaries. - Prepare, dispatch, MLP, and quant stages now have clearer ownership. - Main MoE path no longer depends on business `kwargs.get(...)` lookups. - Comm and dispatcher interfaces are request-only on the main path. - UTs can assert stage-level fields directly instead of inferring behavior indirectly. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? CI passed. --------- Signed-off-by: linfeng-yuan <1102311262@qq.com> 2026-03-20 23:23:57 +08:00			`def test_cumsum_group_list_unsupported_conversion_notimplementederror(self):`
[CI]Fixed the spell check function in `typos.toml` (#6753) ### What this PR does / why we need it? The incorrect regular expression syntax `.[UE4M3\|ue4m3].` actually ignores all words containing any of the following characters: `u, e, 4, m, 3, \|` ```yaml extend-ignore-identifiers-re = [".Unc.", "._thw", ".UE8M0.", ".[UE4M3\|ue4m3].", ".eles.", ".fo.", ".ba.", ".ot.", ".[Tt]h[rR]."] ``` ===fix===> ```yaml extend-ignore-identifiers-re = [".Unc.", "._thw", ".UE8M0.", ".(UE4M3\|ue4m3]).", ".eles.", ".fo.", ".ba.", ".ot.", ".[Tt]h[rR]."] ``` ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? - vLLM version: v0.15.0 - vLLM main: https://github.com/vllm-project/vllm/commit/9562912cead1f11e8540fb91306c5cbda66f0007 Signed-off-by: MrZ20 <2609716663@qq.com> 2026-02-14 11:57:26 +08:00			`for src_list_type, dst_list_type in self.unsupported_combine:`
[UT]Ut for function cumsum_group_list in moe_mlp (ref #5025) (#5036) ### What this PR does / why we need it? Add ut for the cumsum_group_list function, which is related to the precision issues stemming from the moe_mlp.py . The ralated PR is https://github.com/vllm-project/vllm-ascend/pull/5025 ### Does this PR introduce _any_ user-facing change? No - vLLM version: v0.12.0 - vLLM main: https://github.com/vllm-project/vllm/commit/ad32e3e19ccf0526cb6744a5fed09a138a5fb2f9 --------- Signed-off-by: tanqingshan (A) <50050625@china.huawei.com> Co-authored-by: tanqingshan (A) <50050625@china.huawei.com> Co-authored-by: wangxiyuan <wangxiyuan1007@gmail.com> Co-authored-by: weijinqian0 <1184188277@qq.com> 2025-12-18 15:00:16 +08:00			`with self.subTest(src=src_list_type, dst=dst_list_type):`
			`with self.assertRaises(NotImplementedError) as excinfo:`
[refactor] replace scattered business kwargs with typed request objects and explicit stage boundaries (#7024) ### What this PR does / why we need it? Refactor `vllm_ascend/ops/fused_moe` to replace scattered MoE business `**kwargs` with typed request objects and explicit stage boundaries. - Prepare, dispatch, MLP, and quant stages now have clearer ownership. - Main MoE path no longer depends on business `kwargs.get(...)` lookups. - Comm and dispatcher interfaces are request-only on the main path. - UTs can assert stage-level fields directly instead of inferring behavior indirectly. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? CI passed. --------- Signed-off-by: linfeng-yuan <1102311262@qq.com> 2026-03-20 23:23:57 +08:00			`cumsum_group_list(self.glist_dict[0], src_list_type, dst_list_type)`
			`self.assertIn("This feature is under development.", str(excinfo.exception))`
[UT]Ut for function cumsum_group_list in moe_mlp (ref #5025) (#5036) ### What this PR does / why we need it? Add ut for the cumsum_group_list function, which is related to the precision issues stemming from the moe_mlp.py . The ralated PR is https://github.com/vllm-project/vllm-ascend/pull/5025 ### Does this PR introduce _any_ user-facing change? No - vLLM version: v0.12.0 - vLLM main: https://github.com/vllm-project/vllm/commit/ad32e3e19ccf0526cb6744a5fed09a138a5fb2f9 --------- Signed-off-by: tanqingshan (A) <50050625@china.huawei.com> Co-authored-by: tanqingshan (A) <50050625@china.huawei.com> Co-authored-by: wangxiyuan <wangxiyuan1007@gmail.com> Co-authored-by: weijinqian0 <1184188277@qq.com> 2025-12-18 15:00:16 +08:00

[refactor] replace scattered business kwargs with typed request objects and explicit stage boundaries (#7024) ### What this PR does / why we need it? Refactor `vllm_ascend/ops/fused_moe` to replace scattered MoE business `**kwargs` with typed request objects and explicit stage boundaries. - Prepare, dispatch, MLP, and quant stages now have clearer ownership. - Main MoE path no longer depends on business `kwargs.get(...)` lookups. - Comm and dispatcher interfaces are request-only on the main path. - UTs can assert stage-level fields directly instead of inferring behavior indirectly. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? CI passed. --------- Signed-off-by: linfeng-yuan <1102311262@qq.com> 2026-03-20 23:23:57 +08:00			`class TestUnifiedApplyMlpRequest(unittest.TestCase):`
			`def test_request_unquant_path(self):`
			`hidden_states = torch.randn(2, 8)`
			`expected = torch.randn(2, 8)`
			`mlp_compute_input = MoEMlpComputeInput(`
			`hidden_states=hidden_states,`
			`group_list=torch.tensor([2, 2], dtype=torch.int64),`
			`group_list_type=1,`
			`dynamic_scale=None,`
			`topk_scales=None,`
			`weights=MoEWeights(`
			`w1=torch.randn(1, 16, 8),`
			`w2=torch.randn(1, 8, 8),`
			`w1_bias=torch.randn(1, 16),`
			`w2_bias=torch.randn(1, 8),`
			`),`
			`quant=MoEQuantParams(quant_type=QuantType.NONE),`
			`fusion=False,`
			`activation="silu",`
			`need_trans=False,`
			`dynamic_eplb=False,`
			`)`

			`with (`
			`patch("vllm_ascend.ops.fused_moe.moe_mlp.unquant_apply_mlp", return_value=expected) as mock_unquant,`
			`patch("vllm_ascend.ops.fused_moe.moe_mlp.quant_apply_mlp") as mock_quant,`
			`):`
			`output = unified_apply_mlp(mlp_compute_input=mlp_compute_input)`

			`self.assertTrue(output is expected)`
			`mock_unquant.assert_called_once()`
			`self.assertEqual(mock_unquant.call_args.kwargs["activation"], "silu")`
			`self.assertFalse(mock_unquant.call_args.kwargs["need_trans"])`
			`mock_quant.assert_not_called()`

			`def test_request_quant_path(self):`
			`hidden_states = torch.randn(2, 8)`
			`expected = torch.randn(2, 8)`
			`mlp_compute_input = MoEMlpComputeInput(`
			`hidden_states=hidden_states,`
			`group_list=torch.tensor([2, 2], dtype=torch.int64),`
			`group_list_type=1,`
			`dynamic_scale=torch.randn(2, 1),`
			`topk_scales=None,`
			`weights=MoEWeights(`
			`w1=torch.randn(1, 16, 8),`
			`w2=torch.randn(1, 8, 8),`
			`w1_scale=[torch.randn(1)],`
			`w2_scale=[torch.randn(1)],`
			`),`
			`quant=MoEQuantParams(`
			`quant_type=QuantType.MXFP8,`
			`mxfp=MoEMxfpParams(`
			`act_quant_type=torch.float8_e4m3fn,`
			`weight_quant_type=torch.float8_e4m3fn,`
			`use_bf16=False,`
			`),`
			`),`
			`fusion=True,`
			`activation="silu",`
			`need_trans=False,`
			`dynamic_eplb=True,`
			`)`

			`with (`
			`patch("vllm_ascend.ops.fused_moe.moe_mlp.quant_apply_mlp", return_value=expected) as mock_quant,`
			`patch("vllm_ascend.ops.fused_moe.moe_mlp.unquant_apply_mlp") as mock_unquant,`
			`):`
			`output = unified_apply_mlp(mlp_compute_input=mlp_compute_input)`

			`self.assertTrue(output is expected)`
			`mock_quant.assert_called_once()`
			`quant_kwargs = mock_quant.call_args.kwargs`
			`self.assertTrue(quant_kwargs["use_mxfp_quant"])`
			`self.assertTrue(quant_kwargs["fusion"])`
			`self.assertTrue(quant_kwargs["dynamic_eplb"])`
			`self.assertFalse(quant_kwargs["use_bf16"])`
			`mock_unquant.assert_not_called()`


			`if __name__ == "__main__":`
[UT]Ut for function cumsum_group_list in moe_mlp (ref #5025) (#5036) ### What this PR does / why we need it? Add ut for the cumsum_group_list function, which is related to the precision issues stemming from the moe_mlp.py . The ralated PR is https://github.com/vllm-project/vllm-ascend/pull/5025 ### Does this PR introduce _any_ user-facing change? No - vLLM version: v0.12.0 - vLLM main: https://github.com/vllm-project/vllm/commit/ad32e3e19ccf0526cb6744a5fed09a138a5fb2f9 --------- Signed-off-by: tanqingshan (A) <50050625@china.huawei.com> Co-authored-by: tanqingshan (A) <50050625@china.huawei.com> Co-authored-by: wangxiyuan <wangxiyuan1007@gmail.com> Co-authored-by: weijinqian0 <1184188277@qq.com> 2025-12-18 15:00:16 +08:00			`unittest.main(verbosity=2)`