Add gptq quantization model support (#141)

This commit is contained in:
Arcmoon
2024-02-07 03:35:04 +08:00
committed by GitHub
parent ccbe1e67d8
commit 3ae78a09b3
4 changed files with 25 additions and 14 deletions

View File

@@ -19,10 +19,9 @@ class RadixAttention(nn.Module):
head_dim,
scaling,
num_kv_heads,
layer_id,
layer_id
):
super().__init__()
self.tp_q_head_num = num_heads
self.tp_k_head_num = num_kv_heads
self.tp_v_head_num = num_kv_heads