Reduce MoE memory usage (#6147)
This commit is contained in:
@@ -311,10 +311,10 @@ class DeepseekV2MoE(nn.Module):
|
||||
shared_output = self._forward_shared_experts(hidden_states)
|
||||
# router_logits: (num_tokens, n_experts)
|
||||
router_logits = self.gate(hidden_states)
|
||||
final_hidden_states = (
|
||||
self.experts(hidden_states=hidden_states, router_logits=router_logits)
|
||||
* self.routed_scaling_factor
|
||||
final_hidden_states = self.experts(
|
||||
hidden_states=hidden_states, router_logits=router_logits
|
||||
)
|
||||
final_hidden_states *= self.routed_scaling_factor
|
||||
if shared_output is not None:
|
||||
final_hidden_states = final_hidden_states + shared_output
|
||||
if self.tp_size > 1:
|
||||
|
||||
Reference in New Issue
Block a user