Update grok.py and tiktoken tokenizer (#9532)
This commit is contained in:
@@ -52,6 +52,8 @@ class RadixAttention(nn.Module):
|
||||
v_head_dim: int = -1,
|
||||
sliding_window_size: int = -1,
|
||||
is_cross_attention: bool = False,
|
||||
pos_encoding_mode: str = "NONE",
|
||||
logit_capping_method: str = "tanh",
|
||||
quant_config: Optional[QuantizationConfig] = None,
|
||||
attn_type: AttentionType = AttentionType.DECODER,
|
||||
use_irope: bool = False,
|
||||
@@ -81,6 +83,10 @@ class RadixAttention(nn.Module):
|
||||
self.quant_method.create_weights(self)
|
||||
self.attn_type = attn_type
|
||||
|
||||
self.pos_encoding_mode = pos_encoding_mode
|
||||
self.logit_capping_method = logit_capping_method
|
||||
self.xai_temperature_len = -1
|
||||
|
||||
def forward(
|
||||
self,
|
||||
q,
|
||||
|
||||
Reference in New Issue
Block a user