diff --git a/vllm_ascend/ops/rotary_embedding.py b/vllm_ascend/ops/rotary_embedding.py index 0dbe940..0c2a00a 100644 --- a/vllm_ascend/ops/rotary_embedding.py +++ b/vllm_ascend/ops/rotary_embedding.py @@ -82,7 +82,7 @@ def native_rope_deepseek_forward(self, offsets: Optional[torch.Tensor] = None, max_seq_len: Optional[int] = None): if max_seq_len is not None and max_seq_len > self.max_seq_len: - self._set_cos_sin_cache(max_seq_len, query.device, query.dtype) + _set_cos_sin_cache(self, max_seq_len, query.device, query.dtype) if len(key.shape) == 2: key = key[:, None, :] # Note: we implement the non neox_style method with shuffle the last dim and neox style