From b57d87c297f6928851b8830389300f6dbe64701c Mon Sep 17 00:00:00 2001 From: fzyzcjy <5236035+fzyzcjy@users.noreply.github.com> Date: Sat, 14 Jun 2025 17:35:18 +0800 Subject: [PATCH] Fix shared experts fusion + weight requant (#7177) --- python/sglang/srt/models/deepseek_v2.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/python/sglang/srt/models/deepseek_v2.py b/python/sglang/srt/models/deepseek_v2.py index ee86901aa..869756568 100644 --- a/python/sglang/srt/models/deepseek_v2.py +++ b/python/sglang/srt/models/deepseek_v2.py @@ -1960,14 +1960,15 @@ class DeepseekV2ForCausalLM(nn.Module): ) if layer_id in moe_layers: - shared_experts = layer.mlp.shared_experts - for module in [ - shared_experts.gate_up_proj, - shared_experts.down_proj, - ]: - requant_weight_ue8m0_inplace( - module.weight, module.weight_scale_inv, weight_block_size - ) + shared_experts = getattr(layer.mlp, "shared_experts", None) + if shared_experts is not None: + for module in [ + shared_experts.gate_up_proj, + shared_experts.down_proj, + ]: + requant_weight_ue8m0_inplace( + module.weight, module.weight_scale_inv, weight_block_size + ) experts = layer.mlp.experts if isinstance(experts, DeepEPMoE):