From ee2ed573f15b0e4e6aaad5527c9419eb3f9b3acc Mon Sep 17 00:00:00 2001
From: whx <56632993+whx-sjtu@users.noreply.github.com>
Date: Fri, 9 Jan 2026 14:11:44 +0800
Subject: [PATCH] [BugFix][DS 3.2] Fix ds indexer accuracy problem caused by
 rope. (#4641)

### What this PR does / why we need it?
The rotary algorithm in deepseek indexer should be neox-style instead of
gptj style. PR #4413 fix this accuracy bug with new triton kernel. This
PR fixes original pytorch version.

### Does this PR introduce _any_ user-facing change?
None

### How was this patch tested?
CI passed with existing test.


- vLLM version: 86e178f7c4d8c3b0eaf3c8e3f810a83f63b90e24
- vLLM main:
https://github.com/vllm-project/vllm/commit/86e178f7c4d8c3b0eaf3c8e3f810a83f63b90e24

Signed-off-by: whx-sjtu <2952154980@qq.com>
---
 vllm_ascend/attention/sfa_v1.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/vllm_ascend/attention/sfa_v1.py b/vllm_ascend/attention/sfa_v1.py
index c4a2a51e..d3b5b4b2 100644
--- a/vllm_ascend/attention/sfa_v1.py
+++ b/vllm_ascend/attention/sfa_v1.py
@@ -882,7 +882,7 @@ class AscendSFAImpl(MLAAttentionImpl):
                 dim=-1)  # [b,s,64,64+64]
 
             q_pe = q_pe.unsqueeze(2)
-            q_pe = torch_npu.npu_interleave_rope(q_pe, cos_q, sin_q)
+            q_pe = torch_npu.npu_rotary_mul(q_pe, cos_q, sin_q)
             q_pe = q_pe.squeeze(2)
             q = torch.cat([q_pe, q_nope], dim=-1)  # [b*s,64,128]
 
@@ -892,7 +892,7 @@ class AscendSFAImpl(MLAAttentionImpl):
                 dim=-1)  # [b,s,64+64]
 
             k_pe = k_pe.unsqueeze(2)
-            k_pe = torch_npu.npu_interleave_rope(k_pe, cos, sin)
+            k_pe = torch_npu.npu_rotary_mul(k_pe, cos, sin)
             k_pe = k_pe.squeeze(2)
 
             k = torch.cat([k_pe, k_nope], dim=-1)  # [b*s,128]