From d7e19ed57a8b666855720d68185e140094890167 Mon Sep 17 00:00:00 2001
From: whx <56632993+whx-sjtu@users.noreply.github.com>
Date: Tue, 17 Jun 2025 23:14:25 +0800
Subject: [PATCH] [BugFix] fix length of sin/cos cache in rope  (#1266)

This PR fixes the bug that constructs shorter sin/cos cache than model's
max positional embedding.

Closes: https://github.com/vllm-project/vllm-ascend/issues/1038

Signed-off-by: whx-sjtu <2952154980@qq.com>
---
 vllm_ascend/ops/rotary_embedding.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/vllm_ascend/ops/rotary_embedding.py b/vllm_ascend/ops/rotary_embedding.py
index 9f8ae78..39a4c1c 100644
--- a/vllm_ascend/ops/rotary_embedding.py
+++ b/vllm_ascend/ops/rotary_embedding.py
@@ -219,7 +219,9 @@ def _set_cos_sin_cache(self, seq_len, device, dtype):
     inv_freq = freq_inter * (1 - inv_freq_mask) + freq_extra * inv_freq_mask
     self.register_buffer("inv_freq", inv_freq, persistent=False)
 
-    t = torch.arange(seq_len, device=device, dtype=torch.float32)
+    t = torch.arange(seq_len * self.scaling_factor,
+                     device=device,
+                     dtype=torch.float32)
 
     freqs = torch.outer(t, inv_freq)
     cos_cached = torch.cat([freqs, freqs], dim=-1).cos() * self.mscale