Fix w8a8_int8 model shared experts fusion load weights error (#5120)

2025-04-12 14:33:51 +08:00
parent 3c9740d200
commit 1b1b47a949
1 changed files with 18 additions and 8 deletions
--- a/python/sglang/srt/models/deepseek_v2.py
+++ b/python/sglang/srt/models/deepseek_v2.py
@@ -1480,6 +1480,16 @@ class DeepseekV2ForCausalLM(nn.Module):
        if self.n_share_experts_fusion is not None and self.n_share_experts_fusion > 0:
            weights_list = list(weights)
            weights_dict = dict(weights_list)
            if self.quant_config.get_name() == "w8a8_int8":
                suffix_list = [
                    "down_proj.weight",
                    "down_proj.weight_scale",
                    "gate_proj.weight",
                    "gate_proj.weight_scale",
                    "up_proj.weight",
                    "up_proj.weight_scale",
                ]
            else:
                suffix_list = [
                    "down_proj.weight",
                    "down_proj.weight_scale_inv",