From d86021f7b4f86be6f853b03da82628b4d0d6ea4b Mon Sep 17 00:00:00 2001 From: Debonet <37174444+Debonex@users.noreply.github.com> Date: Mon, 5 Jan 2026 20:12:41 +0800 Subject: [PATCH] [Bugfix] record cos and sin cache in AscendRotaryEmbedding (#5516) ### What this PR does / why we need it? In scenarios where models like [Moonlight](https://modelscope.cn/models/moonshotai/Moonlight-16B-A3B-Instruct) (using MLA but without `rope_scaling` in config.json) invoke `AscendRotaryEmbedding`. `_cos_cache` and `_sin_cache` are not recorded correctly. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? - vLLM version: v0.13.0 - vLLM main: https://github.com/vllm-project/vllm/commit/45c1ca1ca1ee8fa06df263c8715e8a412ff408d4 Signed-off-by: Debonex <719893090@qq.com> --- vllm_ascend/ops/rotary_embedding.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/vllm_ascend/ops/rotary_embedding.py b/vllm_ascend/ops/rotary_embedding.py index 63aa3e28..28bde074 100644 --- a/vllm_ascend/ops/rotary_embedding.py +++ b/vllm_ascend/ops/rotary_embedding.py @@ -129,6 +129,18 @@ def _record_cos_and_sin_cache(cos_cache, sin_cache): _sin_cache = sin_cache +def _record_cos_and_sin_cache_interleaved(cos_sin_cache): + global _cos_cache + global _sin_cache + if _cos_cache is not None or _sin_cache is not None: + return + hidden_dim = cos_sin_cache.shape[-1] // 2 + cos_cache, sin_cache = cos_sin_cache.view(-1, 2, hidden_dim).repeat( + 1, 1, 2).chunk(2, dim=1) + _cos_cache = cos_cache.squeeze(1) + _sin_cache = sin_cache.squeeze(1) + + def update_cos_sin(positions): global _cos global _sin @@ -252,6 +264,7 @@ class AscendRotaryEmbedding(RotaryEmbedding): super().__init__(head_size, rotary_dim, max_position_embeddings, base, is_neox_style, dtype) _record_cos_sin_cache(self.cos_sin_cache) + _record_cos_and_sin_cache_interleaved(self.cos_sin_cache) def forward_oot( self,