From a2f602b5418f76f64c4d762dff3c56cc856d484e Mon Sep 17 00:00:00 2001
From: Rin Intachuen <113603872+RinRin-32@users.noreply.github.com>
Date: Thu, 16 Jan 2025 21:51:43 +0700
Subject: [PATCH] fixed lm_head.weight error for quantized qwen (#2910)

---
 python/sglang/srt/models/qwen2.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/python/sglang/srt/models/qwen2.py b/python/sglang/srt/models/qwen2.py
index e42559bbc..bc3f10997 100644
--- a/python/sglang/srt/models/qwen2.py
+++ b/python/sglang/srt/models/qwen2.py
@@ -356,6 +356,8 @@ class Qwen2ForCausalLM(nn.Module):
                 break
             else:
                 # Skip loading extra bias for GPTQ models.
+                if "lm_head.weight" in name:
+                    continue
                 if name.endswith(".bias") and name not in params_dict:
                     continue
                 param = params_dict[name]