Reduce MoE memory usage (#6147)

This commit is contained in:
fzyzcjy
2025-05-16 00:38:28 +08:00
committed by GitHub
parent cfc9f9ab8d
commit f194e14fb7
4 changed files with 75 additions and 40 deletions

View File

@@ -311,10 +311,10 @@ class DeepseekV2MoE(nn.Module):
shared_output = self._forward_shared_experts(hidden_states)
# router_logits: (num_tokens, n_experts)
router_logits = self.gate(hidden_states)
final_hidden_states = (
self.experts(hidden_states=hidden_states, router_logits=router_logits)
* self.routed_scaling_factor
final_hidden_states = self.experts(
hidden_states=hidden_states, router_logits=router_logits
)
final_hidden_states *= self.routed_scaling_factor
if shared_output is not None:
final_hidden_states = final_hidden_states + shared_output
if self.tp_size > 1: