From ef8157a5f256be01d7a602e6ae58817761be68d9 Mon Sep 17 00:00:00 2001
From: AlvisGong <gwly0401@163.com>
Date: Thu, 18 Dec 2025 15:07:40 +0800
Subject: [PATCH] fixed fused alltoall execute all reduce (#5109)

### What this PR does / why we need it?
fixed fused alltoall execute all reduce, when moe_comm_type is
MoECommType.FUSED_ALLTOALL

if moe_comm_type in {MoECommType.ALLTOALL, MoECommType.MC2,
MoECommType.FUSED_ALLTOALL} \
                    and not shared_expert_dp_enabled():
shared_out = tensor_model_parallel_all_reduce(shared_out)


- vLLM version: v0.12.0
- vLLM main:
https://github.com/vllm-project/vllm/commit/ad32e3e19ccf0526cb6744a5fed09a138a5fb2f9

Signed-off-by: AlvisGong <gwly0401@163.com>
Co-authored-by: Jade Zheng <zheng.shoujian@outlook.com>
---
 vllm_ascend/ops/fused_moe/fused_moe.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vllm_ascend/ops/fused_moe/fused_moe.py b/vllm_ascend/ops/fused_moe/fused_moe.py
index f4396992..9adfe085 100644
--- a/vllm_ascend/ops/fused_moe/fused_moe.py
+++ b/vllm_ascend/ops/fused_moe/fused_moe.py
@@ -533,7 +533,7 @@ class AscendSharedFusedMoE(SharedFusedMoE, AscendFusedMoE):
             # NOTE: This is exactly the opposite of `maybe_all_reduce_tensor_model_parallel`
             forward_context = get_forward_context()
             moe_comm_type = forward_context.moe_comm_type
-            if moe_comm_type in {MoECommType.ALLTOALL, MoECommType.MC2} \
+            if moe_comm_type in {MoECommType.ALLTOALL, MoECommType.MC2, MoECommType.FUSED_ALLTOALL} \
                     and not shared_expert_dp_enabled():
                 shared_out = tensor_model_parallel_all_reduce(shared_out)
         else: