Update grok.py and tiktoken tokenizer (#9532)

2025-08-23 05:40:18 -07:00
parent 83871aa12d
commit 86d10d220f
10 changed files with 732 additions and 64 deletions
--- a/python/sglang/srt/layers/radix_attention.py
+++ b/python/sglang/srt/layers/radix_attention.py
@@ -52,6 +52,8 @@ class RadixAttention(nn.Module):
        v_head_dim: int = -1,
        sliding_window_size: int = -1,
        is_cross_attention: bool = False,
+        pos_encoding_mode: str = "NONE",
+        logit_capping_method: str = "tanh",
        quant_config: Optional[QuantizationConfig] = None,
        attn_type: AttentionType = AttentionType.DECODER,
        use_irope: bool = False,
@@ -81,6 +83,10 @@ class RadixAttention(nn.Module):
            self.quant_method.create_weights(self)
        self.attn_type = attn_type

+        self.pos_encoding_mode = pos_encoding_mode
+        self.logit_capping_method = logit_capping_method
+        self.xai_temperature_len = -1
+
    def forward(
        self,
        q,