[Bugfix] record cos and sin cache in AscendRotaryEmbedding (#5516)

### What this PR does / why we need it?

In scenarios where models like
[Moonlight](https://modelscope.cn/models/moonshotai/Moonlight-16B-A3B-Instruct)
(using MLA but without `rope_scaling` in config.json) invoke
`AscendRotaryEmbedding`. `_cos_cache` and `_sin_cache` are not recorded
correctly.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

- vLLM version: v0.13.0
- vLLM main:
45c1ca1ca1

Signed-off-by: Debonex <719893090@qq.com>
This commit is contained in:
Debonet
2026-01-05 20:12:41 +08:00
committed by GitHub
parent 16b1bee804
commit d86021f7b4

View File

@@ -129,6 +129,18 @@ def _record_cos_and_sin_cache(cos_cache, sin_cache):
_sin_cache = sin_cache
def _record_cos_and_sin_cache_interleaved(cos_sin_cache):
global _cos_cache
global _sin_cache
if _cos_cache is not None or _sin_cache is not None:
return
hidden_dim = cos_sin_cache.shape[-1] // 2
cos_cache, sin_cache = cos_sin_cache.view(-1, 2, hidden_dim).repeat(
1, 1, 2).chunk(2, dim=1)
_cos_cache = cos_cache.squeeze(1)
_sin_cache = sin_cache.squeeze(1)
def update_cos_sin(positions):
global _cos
global _sin
@@ -252,6 +264,7 @@ class AscendRotaryEmbedding(RotaryEmbedding):
super().__init__(head_size, rotary_dim, max_position_embeddings, base,
is_neox_style, dtype)
_record_cos_sin_cache(self.cos_sin_cache)
_record_cos_and_sin_cache_interleaved(self.cos_sin_cache)
def forward_oot(
self,