BugFix: Fix moe_load accumulation error in ACL graph mode (#6182)
This PR fixes the numerical error in moe_load accumulation under ACL graph mode on NPU: using += for NPU tensors in graph mode does not throw errors but leads to incorrect values, so we replace it with the in-place add_() method to ensure accurate calculation. Signed-off-by: Mercykid-bash <ruanche0218@gmail.com>
This commit is contained in:
@@ -382,9 +382,9 @@ class AscendFusedMoE(FusedMoE):
|
||||
group_list_type = fused_experts_results.group_list_type
|
||||
assert expert_tokens is not None and group_list_type is not None, \
|
||||
"expert_tokens and group_list_type should not be None when dynamic_eplb is enabled."
|
||||
self.moe_load += expert_tokens if group_list_type == 1 else \
|
||||
local_load = expert_tokens if group_list_type == 1 else \
|
||||
torch.cat([expert_tokens[:1], expert_tokens[1:] - expert_tokens[:-1]])
|
||||
|
||||
self.moe_load.add_(local_load)
|
||||
routed_out = forward_context.moe_comm_method.finalize(
|
||||
hidden_states=fused_experts_results.routed_out,
|
||||
reduce_results=self.reduce_results,
|
||||
|
||||
Reference in New Issue
Block a user