Add gptq quantization model support (#141)

2024-02-07 03:35:04 +08:00
parent ccbe1e67d8
commit 3ae78a09b3
4 changed files with 25 additions and 14 deletions
--- a/python/sglang/srt/layers/radix_attention.py
+++ b/python/sglang/srt/layers/radix_attention.py
@@ -19,10 +19,9 @@ class RadixAttention(nn.Module):
        head_dim,
        scaling,
        num_kv_heads,
-        layer_id,
+        layer_id
    ):
        super().__init__()
-
        self.tp_q_head_num = num_heads
        self.tp_k_head_num = num_kv_heads
        self.tp_v_head_num = num_kv_heads