Reorder loop in shared expert weight loading (#5719)
This commit is contained in:
@@ -215,11 +215,11 @@ class DeepseekV3ForCausalLMNextN(DeepseekV3ForCausalLM):
|
||||
"up_proj.weight_scale_inv",
|
||||
]
|
||||
names_to_remove = []
|
||||
for num_repeat in range(self.n_share_experts_fusion):
|
||||
for suffix in suffix_list:
|
||||
shared_expert_weight_name = (
|
||||
f"model.layers.0.mlp.shared_experts.{suffix}"
|
||||
)
|
||||
for suffix in suffix_list:
|
||||
shared_expert_weight_name = (
|
||||
f"model.layers.0.mlp.shared_experts.{suffix}"
|
||||
)
|
||||
for num_repeat in range(self.n_share_experts_fusion):
|
||||
weights_list.append(
|
||||
(
|
||||
f"model.layers.0."
|
||||
@@ -229,7 +229,7 @@ class DeepseekV3ForCausalLMNextN(DeepseekV3ForCausalLM):
|
||||
weights_dict[shared_expert_weight_name],
|
||||
)
|
||||
)
|
||||
names_to_remove += [shared_expert_weight_name]
|
||||
names_to_remove += [shared_expert_weight_name]
|
||||
weights = [w for w in weights_list if w[0] not in names_to_remove]
|
||||
|
||||
# Params for weights, fp8 weight scales, fp8 activation scales
|
||||
|
||||
@@ -1650,11 +1650,11 @@ class DeepseekV2ForCausalLM(nn.Module):
|
||||
desc=f"Cloning {self.n_share_experts_fusion} "
|
||||
"replicas of the shared expert into MoE",
|
||||
):
|
||||
for num_repeat in range(self.n_share_experts_fusion):
|
||||
for suffix in suffix_list:
|
||||
shared_expert_weight_name = (
|
||||
f"model.layers.{moe_layer}.mlp.shared_experts.{suffix}"
|
||||
)
|
||||
for suffix in suffix_list:
|
||||
shared_expert_weight_name = (
|
||||
f"model.layers.{moe_layer}.mlp.shared_experts.{suffix}"
|
||||
)
|
||||
for num_repeat in range(self.n_share_experts_fusion):
|
||||
weights_list.append(
|
||||
(
|
||||
f"model.layers.{moe_layer}."
|
||||
@@ -1664,7 +1664,7 @@ class DeepseekV2ForCausalLM(nn.Module):
|
||||
weights_dict[shared_expert_weight_name],
|
||||
)
|
||||
)
|
||||
names_to_remove += [shared_expert_weight_name]
|
||||
names_to_remove += [shared_expert_weight_name]
|
||||
weights = [w for w in weights_list if w[0] not in names_to_remove]
|
||||
|
||||
# Params for weights, fp8 weight scales, fp8 activation scales
|
||||
|
||||
Reference in New Issue
Block a user