From a2f602b5418f76f64c4d762dff3c56cc856d484e Mon Sep 17 00:00:00 2001 From: Rin Intachuen <113603872+RinRin-32@users.noreply.github.com> Date: Thu, 16 Jan 2025 21:51:43 +0700 Subject: [PATCH] fixed lm_head.weight error for quantized qwen (#2910) --- python/sglang/srt/models/qwen2.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/python/sglang/srt/models/qwen2.py b/python/sglang/srt/models/qwen2.py index e42559bbc..bc3f10997 100644 --- a/python/sglang/srt/models/qwen2.py +++ b/python/sglang/srt/models/qwen2.py @@ -356,6 +356,8 @@ class Qwen2ForCausalLM(nn.Module): break else: # Skip loading extra bias for GPTQ models. + if "lm_head.weight" in name: + continue if name.endswith(".bias") and name not in params_dict: continue param = params_dict[name]