diff --git a/vllm_ascend/models/qwen2_5_vl.py b/vllm_ascend/models/qwen2_5_vl.py index 2d89d95..d1a94d1 100644 --- a/vllm_ascend/models/qwen2_5_vl.py +++ b/vllm_ascend/models/qwen2_5_vl.py @@ -107,7 +107,7 @@ class AscendQwen2_5_VisionAttention(Qwen2_5_VisionAttention): for x in (q, k, v) ] - context_layer = torch.torch.empty_like(q) + context_layer = torch.empty_like(q) # operator requires pta version >= 2.5.1 torch_npu._npu_flash_attention_unpad( diff --git a/vllm_ascend/models/qwen2_5_vl_without_padding.py b/vllm_ascend/models/qwen2_5_vl_without_padding.py index 291b047..47ddd44 100644 --- a/vllm_ascend/models/qwen2_5_vl_without_padding.py +++ b/vllm_ascend/models/qwen2_5_vl_without_padding.py @@ -87,7 +87,7 @@ class AscendQwen2_5_VisionAttention_Without_Padding(Qwen2_5_VisionAttention): for x in (q, k, v) ] - context_layer = torch.torch.empty_like(q) + context_layer = torch.empty_like(q) # operator requires pta version >= 2.5.1.dev20250226 torch_npu._npu_flash_attention_unpad( diff --git a/vllm_ascend/models/qwen2_vl.py b/vllm_ascend/models/qwen2_vl.py index 9d049e3..a677b06 100644 --- a/vllm_ascend/models/qwen2_vl.py +++ b/vllm_ascend/models/qwen2_vl.py @@ -95,7 +95,7 @@ class AscendQwen2VisionAttention(Qwen2VisionAttention): for x in (q, k, v) ] - context_layer = torch.torch.empty_like(q) + context_layer = torch.empty_like(q) # operator requires pta version >= 2.5.1 torch_npu._npu_flash_attention_unpad(