From ef8157a5f256be01d7a602e6ae58817761be68d9 Mon Sep 17 00:00:00 2001 From: AlvisGong Date: Thu, 18 Dec 2025 15:07:40 +0800 Subject: [PATCH] fixed fused alltoall execute all reduce (#5109) ### What this PR does / why we need it? fixed fused alltoall execute all reduce, when moe_comm_type is MoECommType.FUSED_ALLTOALL if moe_comm_type in {MoECommType.ALLTOALL, MoECommType.MC2, MoECommType.FUSED_ALLTOALL} \ and not shared_expert_dp_enabled(): shared_out = tensor_model_parallel_all_reduce(shared_out) - vLLM version: v0.12.0 - vLLM main: https://github.com/vllm-project/vllm/commit/ad32e3e19ccf0526cb6744a5fed09a138a5fb2f9 Signed-off-by: AlvisGong Co-authored-by: Jade Zheng --- vllm_ascend/ops/fused_moe/fused_moe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm_ascend/ops/fused_moe/fused_moe.py b/vllm_ascend/ops/fused_moe/fused_moe.py index f4396992..9adfe085 100644 --- a/vllm_ascend/ops/fused_moe/fused_moe.py +++ b/vllm_ascend/ops/fused_moe/fused_moe.py @@ -533,7 +533,7 @@ class AscendSharedFusedMoE(SharedFusedMoE, AscendFusedMoE): # NOTE: This is exactly the opposite of `maybe_all_reduce_tensor_model_parallel` forward_context = get_forward_context() moe_comm_type = forward_context.moe_comm_type - if moe_comm_type in {MoECommType.ALLTOALL, MoECommType.MC2} \ + if moe_comm_type in {MoECommType.ALLTOALL, MoECommType.MC2, MoECommType.FUSED_ALLTOALL} \ and not shared_expert_dp_enabled(): shared_out = tensor_model_parallel_all_reduce(shared_out) else: