Optimize qwen2_vl and qwen2_5_vl (#701)

### What this PR does / why we need it?
Optimize qwen2_vl and qwen2_5_vl.

### Does this PR introduce _any_ user-facing change?
no

### How was this patch tested?
Testing this PR on 1080p picture with tp=1, bs=1 on Qwen2-VL and
Qwen2.5-VL, every fa op's during time lasting from 11ms to 9ms, got
roughly 22% perf boost.

---------

Signed-off-by: zouyida2052 <zouyida@huawei.com>
Signed-off-by: zouyida2052 <zouyida2002@gmail.com>
Co-authored-by: zouyida2052 <zouyida@huawei.com>
This commit is contained in:
zouyida2052
2025-04-30 14:22:38 +08:00
committed by GitHub
parent 90aabaeb2e
commit ba9714ccee
4 changed files with 559 additions and 27 deletions

View File

@@ -5,7 +5,9 @@ def register_model():
from .deepseek_mtp import CustomDeepSeekMTP # noqa: F401
from .deepseek_v2 import CustomDeepseekV2ForCausalLM # noqa: F401
from .deepseek_v2 import CustomDeepseekV3ForCausalLM # noqa: F401
from .qwen2_vl import CustomQwen2VLForConditionalGeneration # noqa: F401
from .qwen2_5_vl import \
AscendQwen2_5_VLForConditionalGeneration # noqa: F401
from .qwen2_vl import AscendQwen2VLForConditionalGeneration # noqa: F401
ModelRegistry.register_model(
"DeepSeekMTPModel",
@@ -13,7 +15,12 @@ def register_model():
ModelRegistry.register_model(
"Qwen2VLForConditionalGeneration",
"vllm_ascend.models.qwen2_vl:CustomQwen2VLForConditionalGeneration")
"vllm_ascend.models.qwen2_vl:AscendQwen2VLForConditionalGeneration")
ModelRegistry.register_model(
"Qwen2_5_VLForConditionalGeneration",
"vllm_ascend.models.qwen2_5_vl:AscendQwen2_5_VLForConditionalGeneration"
)
ModelRegistry.register_model(
"DeepseekV2ForCausalLM",