Speedup shared expert weight construction by avoid cloning (#5188)
This commit is contained in:
@@ -1628,7 +1628,7 @@ class DeepseekV2ForCausalLM(nn.Module):
|
|||||||
f"mlp.experts."
|
f"mlp.experts."
|
||||||
f"{self.config.n_routed_experts + num_repeat}"
|
f"{self.config.n_routed_experts + num_repeat}"
|
||||||
f".{suffix}",
|
f".{suffix}",
|
||||||
weights_dict[shared_expert_weight_name].clone(),
|
weights_dict[shared_expert_weight_name],
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
names_to_remove += [shared_expert_weight_name]
|
names_to_remove += [shared_expert_weight_name]
|
||||||
|
|||||||
Reference in New Issue
Block a user