[0.11.0][Perf] Add padding vision tower for Qwen2_5_Omni (#4041)

### What this PR does / why we need it?
This PR repalce the vision tower in Qwen2.5-Omni-Thinker model,
Qwen2_5_VisionTransformer, with AscendQwen2_5_VisionTransformer, which
use QKV padding for padding performance.

- vLLM version: v0.11.0rc3
- vLLM main: https://github.com/vllm-project/vllm/commit/v0.11.0

Signed-off-by: Ting FU <futing10@huawei.com>
This commit is contained in:
tingfu
2025-11-08 13:56:05 +08:00
committed by GitHub
parent d4e2a44307
commit f9842560cb
3 changed files with 66 additions and 0 deletions

View File

@@ -349,6 +349,9 @@ class AscendQwen2_5_VisionTransformer(Qwen2_5_VisionTransformer):
("qkv_proj", "q_proj", "q"),
("qkv_proj", "k_proj", "k"),
("qkv_proj", "v_proj", "v"),
("attn.qkv.", "attn.q.", "q"),
("attn.qkv.", "attn.k.", "k"),
("attn.qkv.", "attn.v.", "v"),
("mlp.gate_up_proj.", "mlp.gate_proj.", 0),
("mlp.gate_up_proj.", "mlp.up_proj.", 1),
]
@@ -363,6 +366,11 @@ class AscendQwen2_5_VisionTransformer(Qwen2_5_VisionTransformer):
param = params_dict[name]
weight_loader = param.weight_loader
weight_loader(param, loaded_weight, shard_id)
if self.enable_pad and shard_id == "v":
if "attn.qkv.weight" in name:
param.data = self.pad_qkv_weight(param.data)
if "attn.qkv.bias" in name:
param.data = self.pad_qkv_bias(param.data)
break
else:
param = params_dict[name]