[Bug] Fix duplicate 'torch.' prefix in qwen-vl (#1986)
Signed-off-by: wuzhongjian <wuzhongjian_yewu@cmss.chinamobile.com>
### What this PR does / why we need it?
Fix duplicate 'torch.' prefix in qwen2-vl, qwen2.5-vl
- vLLM version: v0.9.2
- vLLM main:
dde295a934
This commit is contained in:
@@ -107,7 +107,7 @@ class AscendQwen2_5_VisionAttention(Qwen2_5_VisionAttention):
|
|||||||
for x in (q, k, v)
|
for x in (q, k, v)
|
||||||
]
|
]
|
||||||
|
|
||||||
context_layer = torch.torch.empty_like(q)
|
context_layer = torch.empty_like(q)
|
||||||
|
|
||||||
# operator requires pta version >= 2.5.1
|
# operator requires pta version >= 2.5.1
|
||||||
torch_npu._npu_flash_attention_unpad(
|
torch_npu._npu_flash_attention_unpad(
|
||||||
|
|||||||
@@ -87,7 +87,7 @@ class AscendQwen2_5_VisionAttention_Without_Padding(Qwen2_5_VisionAttention):
|
|||||||
for x in (q, k, v)
|
for x in (q, k, v)
|
||||||
]
|
]
|
||||||
|
|
||||||
context_layer = torch.torch.empty_like(q)
|
context_layer = torch.empty_like(q)
|
||||||
|
|
||||||
# operator requires pta version >= 2.5.1.dev20250226
|
# operator requires pta version >= 2.5.1.dev20250226
|
||||||
torch_npu._npu_flash_attention_unpad(
|
torch_npu._npu_flash_attention_unpad(
|
||||||
|
|||||||
@@ -95,7 +95,7 @@ class AscendQwen2VisionAttention(Qwen2VisionAttention):
|
|||||||
for x in (q, k, v)
|
for x in (q, k, v)
|
||||||
]
|
]
|
||||||
|
|
||||||
context_layer = torch.torch.empty_like(q)
|
context_layer = torch.empty_like(q)
|
||||||
|
|
||||||
# operator requires pta version >= 2.5.1
|
# operator requires pta version >= 2.5.1
|
||||||
torch_npu._npu_flash_attention_unpad(
|
torch_npu._npu_flash_attention_unpad(
|
||||||
|
|||||||
Reference in New Issue
Block a user