From 1b1b47a9491a24d95e07f8440c879fb281c30688 Mon Sep 17 00:00:00 2001 From: lambert0312 Date: Sat, 12 Apr 2025 14:33:51 +0800 Subject: [PATCH] Fix w8a8_int8 model shared experts fusion load weights error (#5120) --- python/sglang/srt/models/deepseek_v2.py | 26 +++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/python/sglang/srt/models/deepseek_v2.py b/python/sglang/srt/models/deepseek_v2.py index 6a960a371..42ae9d293 100644 --- a/python/sglang/srt/models/deepseek_v2.py +++ b/python/sglang/srt/models/deepseek_v2.py @@ -1480,14 +1480,24 @@ class DeepseekV2ForCausalLM(nn.Module): if self.n_share_experts_fusion is not None and self.n_share_experts_fusion > 0: weights_list = list(weights) weights_dict = dict(weights_list) - suffix_list = [ - "down_proj.weight", - "down_proj.weight_scale_inv", - "gate_proj.weight", - "gate_proj.weight_scale_inv", - "up_proj.weight", - "up_proj.weight_scale_inv", - ] + if self.quant_config.get_name() == "w8a8_int8": + suffix_list = [ + "down_proj.weight", + "down_proj.weight_scale", + "gate_proj.weight", + "gate_proj.weight_scale", + "up_proj.weight", + "up_proj.weight_scale", + ] + else: + suffix_list = [ + "down_proj.weight", + "down_proj.weight_scale_inv", + "gate_proj.weight", + "gate_proj.weight_scale_inv", + "up_proj.weight", + "up_proj.weight_scale_inv", + ] names_to_remove = [] for moe_layer in tqdm( range(