Update grok.py and tiktoken tokenizer (#9532)

This commit is contained in:
Lianmin Zheng
2025-08-23 05:40:18 -07:00
committed by GitHub
parent 83871aa12d
commit 86d10d220f
10 changed files with 732 additions and 64 deletions

View File

@@ -52,6 +52,8 @@ class RadixAttention(nn.Module):
v_head_dim: int = -1,
sliding_window_size: int = -1,
is_cross_attention: bool = False,
pos_encoding_mode: str = "NONE",
logit_capping_method: str = "tanh",
quant_config: Optional[QuantizationConfig] = None,
attn_type: AttentionType = AttentionType.DECODER,
use_irope: bool = False,
@@ -81,6 +83,10 @@ class RadixAttention(nn.Module):
self.quant_method.create_weights(self)
self.attn_type = attn_type
self.pos_encoding_mode = pos_encoding_mode
self.logit_capping_method = logit_capping_method
self.xai_temperature_len = -1
def forward(
self,
q,