From d7e19ed57a8b666855720d68185e140094890167 Mon Sep 17 00:00:00 2001 From: whx <56632993+whx-sjtu@users.noreply.github.com> Date: Tue, 17 Jun 2025 23:14:25 +0800 Subject: [PATCH] [BugFix] fix length of sin/cos cache in rope (#1266) This PR fixes the bug that constructs shorter sin/cos cache than model's max positional embedding. Closes: https://github.com/vllm-project/vllm-ascend/issues/1038 Signed-off-by: whx-sjtu <2952154980@qq.com> --- vllm_ascend/ops/rotary_embedding.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/vllm_ascend/ops/rotary_embedding.py b/vllm_ascend/ops/rotary_embedding.py index 9f8ae78..39a4c1c 100644 --- a/vllm_ascend/ops/rotary_embedding.py +++ b/vllm_ascend/ops/rotary_embedding.py @@ -219,7 +219,9 @@ def _set_cos_sin_cache(self, seq_len, device, dtype): inv_freq = freq_inter * (1 - inv_freq_mask) + freq_extra * inv_freq_mask self.register_buffer("inv_freq", inv_freq, persistent=False) - t = torch.arange(seq_len, device=device, dtype=torch.float32) + t = torch.arange(seq_len * self.scaling_factor, + device=device, + dtype=torch.float32) freqs = torch.outer(t, inv_freq) cos_cached = torch.cat([freqs, freqs], dim=-1).cos() * self.mscale