Fix shared experts fusion + weight requant (#7177)

This commit is contained in:
fzyzcjy
2025-06-14 17:35:18 +08:00
committed by GitHub
parent 98538822d5
commit b57d87c297

View File

@@ -1960,14 +1960,15 @@ class DeepseekV2ForCausalLM(nn.Module):
) )
if layer_id in moe_layers: if layer_id in moe_layers:
shared_experts = layer.mlp.shared_experts shared_experts = getattr(layer.mlp, "shared_experts", None)
for module in [ if shared_experts is not None:
shared_experts.gate_up_proj, for module in [
shared_experts.down_proj, shared_experts.gate_up_proj,
]: shared_experts.down_proj,
requant_weight_ue8m0_inplace( ]:
module.weight, module.weight_scale_inv, weight_block_size requant_weight_ue8m0_inplace(
) module.weight, module.weight_scale_inv, weight_block_size
)
experts = layer.mlp.experts experts = layer.mlp.experts
if isinstance(experts, DeepEPMoE): if isinstance(experts, DeepEPMoE):