From 29fb27d3bb7a467690ec5d59b2fa857f994830c8 Mon Sep 17 00:00:00 2001 From: Mercykid-bash Date: Mon, 26 Jan 2026 17:18:46 +0800 Subject: [PATCH] BugFix: Fix moe_load accumulation error in ACL graph mode (#6182) This PR fixes the numerical error in moe_load accumulation under ACL graph mode on NPU: using += for NPU tensors in graph mode does not throw errors but leads to incorrect values, so we replace it with the in-place add_() method to ensure accurate calculation. Signed-off-by: Mercykid-bash --- vllm_ascend/ops/fused_moe/fused_moe.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vllm_ascend/ops/fused_moe/fused_moe.py b/vllm_ascend/ops/fused_moe/fused_moe.py index f94871fc..12d6a63e 100644 --- a/vllm_ascend/ops/fused_moe/fused_moe.py +++ b/vllm_ascend/ops/fused_moe/fused_moe.py @@ -382,9 +382,9 @@ class AscendFusedMoE(FusedMoE): group_list_type = fused_experts_results.group_list_type assert expert_tokens is not None and group_list_type is not None, \ "expert_tokens and group_list_type should not be None when dynamic_eplb is enabled." - self.moe_load += expert_tokens if group_list_type == 1 else \ + local_load = expert_tokens if group_list_type == 1 else \ torch.cat([expert_tokens[:1], expert_tokens[1:] - expert_tokens[:-1]]) - + self.moe_load.add_(local_load) routed_out = forward_context.moe_comm_method.finalize( hidden_states=fused_experts_results.routed_out, reduce_results=self.reduce_results,