From 9766cf91288912f6abbfdf3009c27006a7144540 Mon Sep 17 00:00:00 2001
From: shaopeng-666 <lishaopeng21@huawei.com>
Date: Mon, 8 Dec 2025 19:19:17 +0800
Subject: [PATCH] fix qwen3vl mrope op (#4484)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### What this PR does / why we need it?
Qwen2.5-VL mrope precision problem would been solved once this pr is
merged
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
Test on G8600 with textVQA dataset

- vLLM version: v0.11.2
- vLLM main: https://github.com/vllm-project/vllm/commit/v0.11.2

---------

Signed-off-by: 李少鹏 <lishaopeng21@huawei.com>
Co-authored-by: wangxiyuan <wangxiyuan1007@gmail.com>
---
 vllm_ascend/ops/rotary_embedding.py | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/vllm_ascend/ops/rotary_embedding.py b/vllm_ascend/ops/rotary_embedding.py
index ee9dd9f9..71b0411c 100644
--- a/vllm_ascend/ops/rotary_embedding.py
+++ b/vllm_ascend/ops/rotary_embedding.py
@@ -24,7 +24,6 @@ from vllm.forward_context import get_forward_context
 from vllm.model_executor.layers.rotary_embedding import (
     DeepseekScalingRotaryEmbedding, MRotaryEmbedding, RotaryEmbedding,
     YaRNScalingRotaryEmbedding)
-from vllm.platforms import CpuArchEnum
 
 from vllm_ascend.platform import NPUPlatform
 from vllm_ascend.utils import (AscendDeviceType, enable_custom_op,
@@ -411,10 +410,7 @@ class AscendMRotaryEmbedding(MRotaryEmbedding):
         query: torch.Tensor,
         key: torch.Tensor,
     ):
-        # TODO: This judgment will be removed once the mrope precision issue is fixed
-        if self.mrope_section != [
-                16, 24, 24
-        ] or NPUPlatform.get_cpu_architecture() == CpuArchEnum.X86:
+        if self.mrope_section != [16, 24, 24]:
             return super().forward_oot(positions, query, key)
 
         import torch_npu
@@ -429,7 +425,7 @@ class AscendMRotaryEmbedding(MRotaryEmbedding):
             self.cos_sin_cache = self.cos_sin_cache.to(  # type: ignore
                 query.dtype)  # type: ignore
 
-        query, key = torch_npu.npu_mrope(positions,
+        query, key = torch_npu.npu_mrope(positions.contiguous(),
                                          query.contiguous(),
                                          key.contiguous(),
                                          self.cos_sin_cache.contiguous(),