From cfdd45ed00ab9c344f3aacb74f3521eba7671675 Mon Sep 17 00:00:00 2001
From: JohnJan <wuzhongjian_yewu@cmss.chinamobile.com>
Date: Thu, 24 Jul 2025 20:16:00 +0800
Subject: [PATCH] [Bug] Fix duplicate 'torch.' prefix in qwen-vl (#1986)

Signed-off-by: wuzhongjian <wuzhongjian_yewu@cmss.chinamobile.com>

### What this PR does / why we need it?
Fix duplicate 'torch.' prefix in qwen2-vl, qwen2.5-vl

- vLLM version: v0.9.2
- vLLM main:
https://github.com/vllm-project/vllm/commit/dde295a9342fa2b1a3f6c4886706694a53f7b97d
---
 vllm_ascend/models/qwen2_5_vl.py                 | 2 +-
 vllm_ascend/models/qwen2_5_vl_without_padding.py | 2 +-
 vllm_ascend/models/qwen2_vl.py                   | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/vllm_ascend/models/qwen2_5_vl.py b/vllm_ascend/models/qwen2_5_vl.py
index 2d89d95..d1a94d1 100644
--- a/vllm_ascend/models/qwen2_5_vl.py
+++ b/vllm_ascend/models/qwen2_5_vl.py
@@ -107,7 +107,7 @@ class AscendQwen2_5_VisionAttention(Qwen2_5_VisionAttention):
             for x in (q, k, v)
         ]
 
-        context_layer = torch.torch.empty_like(q)
+        context_layer = torch.empty_like(q)
 
         # operator requires pta version >= 2.5.1
         torch_npu._npu_flash_attention_unpad(
diff --git a/vllm_ascend/models/qwen2_5_vl_without_padding.py b/vllm_ascend/models/qwen2_5_vl_without_padding.py
index 291b047..47ddd44 100644
--- a/vllm_ascend/models/qwen2_5_vl_without_padding.py
+++ b/vllm_ascend/models/qwen2_5_vl_without_padding.py
@@ -87,7 +87,7 @@ class AscendQwen2_5_VisionAttention_Without_Padding(Qwen2_5_VisionAttention):
             for x in (q, k, v)
         ]
 
-        context_layer = torch.torch.empty_like(q)
+        context_layer = torch.empty_like(q)
 
         # operator requires pta version >= 2.5.1.dev20250226
         torch_npu._npu_flash_attention_unpad(
diff --git a/vllm_ascend/models/qwen2_vl.py b/vllm_ascend/models/qwen2_vl.py
index 9d049e3..a677b06 100644
--- a/vllm_ascend/models/qwen2_vl.py
+++ b/vllm_ascend/models/qwen2_vl.py
@@ -95,7 +95,7 @@ class AscendQwen2VisionAttention(Qwen2VisionAttention):
             for x in (q, k, v)
         ]
 
-        context_layer = torch.torch.empty_like(q)
+        context_layer = torch.empty_like(q)
 
         # operator requires pta version >= 2.5.1
         torch_npu._npu_flash_attention_unpad(