Optimize qwen2_vl and qwen2_5_vl (#701)
### What this PR does / why we need it? Optimize qwen2_vl and qwen2_5_vl. ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? Testing this PR on 1080p picture with tp=1, bs=1 on Qwen2-VL and Qwen2.5-VL, every fa op's during time lasting from 11ms to 9ms, got roughly 22% perf boost. --------- Signed-off-by: zouyida2052 <zouyida@huawei.com> Signed-off-by: zouyida2052 <zouyida2002@gmail.com> Co-authored-by: zouyida2052 <zouyida@huawei.com>
This commit is contained in:
@@ -16,7 +16,7 @@
|
||||
#
|
||||
|
||||
import torch
|
||||
from vllm.model_executor.layers.activation import SiluAndMul
|
||||
from vllm.model_executor.layers.activation import QuickGELU, SiluAndMul
|
||||
|
||||
|
||||
def silu_and_mul_forward_oot(self, x: torch.Tensor) -> torch.Tensor:
|
||||
@@ -26,4 +26,12 @@ def silu_and_mul_forward_oot(self, x: torch.Tensor) -> torch.Tensor:
|
||||
return out
|
||||
|
||||
|
||||
SiluAndMul.forward_oot = silu_and_mul_forward_oot
|
||||
def quick_gelu_forward_oot(self, x: torch.tensor) -> torch.Tensor:
|
||||
import torch_npu
|
||||
|
||||
out = torch_npu.npu_fast_gelu(x)
|
||||
return out
|
||||
|
||||
|
||||
QuickGELU.forward_oot = quick_gelu_forward_oot
|
||||
SiluAndMul.forward_oot = silu_and_mul_forward_oot
|
||||
Reference in New Issue
Block a user