From c1392a6ce6c727db9ae10c340321ce70b46e74a7 Mon Sep 17 00:00:00 2001 From: rjg-lyh <83491835+rjg-lyh@users.noreply.github.com> Date: Wed, 18 Mar 2026 14:20:21 +0800 Subject: [PATCH] [bugfix][accuracy] Fix ds indexer accuracy problem caused by k rope (#7341) ### What this PR does / why we need it? The rotary algorithm in deepseek indexer should be neox-style instead of gptj style. PR #4641 fix this accuracy bug in original pytorch version. But PR #5701 accidentally removed the fixed code line and reverted the implementation back to the problematic version. This PR fixes it. Signed-off-by: rjg-lyh <1318825571@qq.com> --- vllm_ascend/attention/sfa_v1.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm_ascend/attention/sfa_v1.py b/vllm_ascend/attention/sfa_v1.py index 5d65ede3..da708060 100644 --- a/vllm_ascend/attention/sfa_v1.py +++ b/vllm_ascend/attention/sfa_v1.py @@ -884,7 +884,7 @@ class AscendSFAImpl(MLAAttentionImpl): sin = sin.view(-1, 1, 1, self.qk_rope_head_dim) k_li_pe = k_li_pe.unsqueeze(2) - k_li_pe = torch_npu.npu_interleave_rope(k_li_pe, cos, sin) + k_li_pe = torch_npu.npu_rotary_mul(k_li_pe, cos, sin) k_li_pe = k_li_pe.squeeze(2) k_li = torch.cat([k_li_pe, k_li_nope], dim=-1) # [b*s,128]