fixed fused alltoall execute all reduce (#5109)

### What this PR does / why we need it? fixed fused alltoall execute all reduce, when moe_comm_type is MoECommType.FUSED_ALLTOALL if moe_comm_type in {MoECommType.ALLTOALL, MoECommType.MC2, MoECommType.FUSED_ALLTOALL} \ and not shared_expert_dp_enabled(): shared_out = tensor_model_parallel_all_reduce(shared_out) - vLLM version: v0.12.0 - vLLM main: ad32e3e19c Signed-off-by: AlvisGong <gwly0401@163.com> Co-authored-by: Jade Zheng <zheng.shoujian@outlook.com>
2025-12-18 15:07:40 +08:00
parent 78602eab4f
commit ef8157a5f2
1 changed files with 1 additions and 1 deletions
--- a/vllm_ascend/ops/fused_moe/fused_moe.py
+++ b/vllm_ascend/ops/fused_moe/fused_moe.py
@@ -533,7 +533,7 @@ class AscendSharedFusedMoE(SharedFusedMoE, AscendFusedMoE):
            # NOTE: This is exactly the opposite of `maybe_all_reduce_tensor_model_parallel`
            forward_context = get_forward_context()
            moe_comm_type = forward_context.moe_comm_type
-            if moe_comm_type in {MoECommType.ALLTOALL, MoECommType.MC2} \
+            if moe_comm_type in {MoECommType.ALLTOALL, MoECommType.MC2, MoECommType.FUSED_ALLTOALL} \
                    and not shared_expert_dp_enabled():
                shared_out = tensor_model_parallel_all_reduce(shared_out)
        else: