Fix RotaryEmbedding when using Triton backend for EXAONE-3.5-2.4B (#4064)

This commit is contained in:
Kyungmin Lee
2025-03-24 09:58:12 +09:00
committed by GitHub
parent 4d25305700
commit 2a206b22ed

View File

@@ -148,7 +148,7 @@ class RotaryEmbedding(CustomOp):
key: torch.Tensor,
offsets: Optional[torch.Tensor] = None,
) -> Tuple[torch.Tensor, torch.Tensor]:
if _is_cuda_available:
if _is_cuda_available and (self.head_size in [64, 128, 256, 512]):
apply_rope_with_cos_sin_cache_inplace(
positions=positions,
query=query,