From 778cb7255697ad0d1562f60d0cf4ef68542a5b94 Mon Sep 17 00:00:00 2001
From: realliujiaxu <72550220+realliujiaxu@users.noreply.github.com>
Date: Fri, 12 Sep 2025 09:49:36 +0800
Subject: [PATCH] fix bug when rotary_dim is not 128 (#2847)

### What this PR does / why we need it?
`torch_npu.npu_apply_rotary_pos_emb` only support head_size and
rotary_dim equal 128. Error occurs when running GLM

### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?

- vLLM version: main
- vLLM main:
https://github.com/vllm-project/vllm/commit/404c85ca7290d314bbbf4d130bb55becc437c4c2

Signed-off-by: realliujiaxu <realliujiaxu@163.com>
---
 vllm_ascend/ops/rotary_embedding.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/vllm_ascend/ops/rotary_embedding.py b/vllm_ascend/ops/rotary_embedding.py
index ea47c04..4b76dce 100644
--- a/vllm_ascend/ops/rotary_embedding.py
+++ b/vllm_ascend/ops/rotary_embedding.py
@@ -138,8 +138,8 @@ class AscendRotaryEmbedding(RotaryEmbedding):
         forward_context = get_forward_context()
         is_first_layer = forward_context.is_first_layer
         # Generate cos and sin outside layers to avoid repeated calculation.
-        if is_neox_style and \
-            self.head_size == 128:
+        if is_neox_style and self.head_size == 128 and self.cos_sin_cache.shape[
+                -1] == 128:
             if is_first_layer:
                 cos_sin = self.cos_sin_cache.index_select(0, positions)
                 last_dim = cos_sin.size()[-1]