Error occurs when loading the gemma model in bitsandbytes format. (#2557)

2024-12-26 22:10:37 +09:00
parent 60bd32723a
commit 08effbff35
2 changed files with 41 additions and 11 deletions
--- a/python/sglang/srt/models/gemma2.py
+++ b/python/sglang/srt/models/gemma2.py
@@ -307,6 +307,25 @@ class Gemma2Model(nn.Module):


 class Gemma2ForCausalLM(nn.Module):
+    # BitandBytes specific attributes
+    default_bitsandbytes_target_modules = [
+        ".gate_proj.",
+        ".down_proj.",
+        ".up_proj.",
+        ".q_proj.",
+        ".k_proj.",
+        ".v_proj.",
+        ".o_proj.",
+    ]
+    bitsandbytes_stacked_params_mapping = {
+        # shard_name, weight_name, index
+        "q_proj": ("qkv_proj", 0),
+        "k_proj": ("qkv_proj", 1),
+        "v_proj": ("qkv_proj", 2),
+        "gate_proj": ("gate_up_proj", 0),
+        "up_proj": ("gate_up_proj", 1),
+    }
+
    packed_modules_mapping = {
        "qkv_proj": [
            "q_proj",