Reorder loop in shared expert weight loading (#5719)

This commit is contained in:
Ke Bao
2025-04-26 08:27:42 +08:00
committed by GitHub
parent 269c457e05
commit c3948ba67e
2 changed files with 12 additions and 12 deletions

View File

@@ -215,11 +215,11 @@ class DeepseekV3ForCausalLMNextN(DeepseekV3ForCausalLM):
"up_proj.weight_scale_inv", "up_proj.weight_scale_inv",
] ]
names_to_remove = [] names_to_remove = []
for num_repeat in range(self.n_share_experts_fusion):
for suffix in suffix_list: for suffix in suffix_list:
shared_expert_weight_name = ( shared_expert_weight_name = (
f"model.layers.0.mlp.shared_experts.{suffix}" f"model.layers.0.mlp.shared_experts.{suffix}"
) )
for num_repeat in range(self.n_share_experts_fusion):
weights_list.append( weights_list.append(
( (
f"model.layers.0." f"model.layers.0."

View File

@@ -1650,11 +1650,11 @@ class DeepseekV2ForCausalLM(nn.Module):
desc=f"Cloning {self.n_share_experts_fusion} " desc=f"Cloning {self.n_share_experts_fusion} "
"replicas of the shared expert into MoE", "replicas of the shared expert into MoE",
): ):
for num_repeat in range(self.n_share_experts_fusion):
for suffix in suffix_list: for suffix in suffix_list:
shared_expert_weight_name = ( shared_expert_weight_name = (
f"model.layers.{moe_layer}.mlp.shared_experts.{suffix}" f"model.layers.{moe_layer}.mlp.shared_experts.{suffix}"
) )
for num_repeat in range(self.n_share_experts_fusion):
weights_list.append( weights_list.append(
( (
f"model.layers.{moe_layer}." f"model.layers.{moe_layer}."