Add gptq quantization model support (#141)
This commit is contained in:
@@ -19,10 +19,9 @@ class RadixAttention(nn.Module):
|
||||
head_dim,
|
||||
scaling,
|
||||
num_kv_heads,
|
||||
layer_id,
|
||||
layer_id
|
||||
):
|
||||
super().__init__()
|
||||
|
||||
self.tp_q_head_num = num_heads
|
||||
self.tp_k_head_num = num_kv_heads
|
||||
self.tp_v_head_num = num_kv_heads
|
||||
|
||||
Reference in New Issue
Block a user