From 2a206b22eda88bbeddb531a840dab6615deb414e Mon Sep 17 00:00:00 2001 From: Kyungmin Lee <30465912+lkm2835@users.noreply.github.com> Date: Mon, 24 Mar 2025 09:58:12 +0900 Subject: [PATCH] Fix RotaryEmbedding when using Triton backend for EXAONE-3.5-2.4B (#4064) --- python/sglang/srt/layers/rotary_embedding.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/sglang/srt/layers/rotary_embedding.py b/python/sglang/srt/layers/rotary_embedding.py index cbd8beb66..fb6bdd76b 100644 --- a/python/sglang/srt/layers/rotary_embedding.py +++ b/python/sglang/srt/layers/rotary_embedding.py @@ -148,7 +148,7 @@ class RotaryEmbedding(CustomOp): key: torch.Tensor, offsets: Optional[torch.Tensor] = None, ) -> Tuple[torch.Tensor, torch.Tensor]: - if _is_cuda_available: + if _is_cuda_available and (self.head_size in [64, 128, 256, 512]): apply_rope_with_cos_sin_cache_inplace( positions=positions, query=query,