Optimize qwen2_vl and qwen2_5_vl (#701)

### What this PR does / why we need it? Optimize qwen2_vl and qwen2_5_vl. ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? Testing this PR on 1080p picture with tp=1, bs=1 on Qwen2-VL and Qwen2.5-VL, every fa op's during time lasting from 11ms to 9ms, got roughly 22% perf boost. --------- Signed-off-by: zouyida2052 <zouyida@huawei.com> Signed-off-by: zouyida2052 <zouyida2002@gmail.com> Co-authored-by: zouyida2052 <zouyida@huawei.com>
2025-04-30 14:22:38 +08:00
parent 90aabaeb2e
commit ba9714ccee
4 changed files with 559 additions and 27 deletions
--- a/vllm_ascend/models/init.py
+++ b/vllm_ascend/models/init.py
@@ -5,7 +5,9 @@ def register_model():
    from .deepseek_mtp import CustomDeepSeekMTP  # noqa: F401
    from .deepseek_v2 import CustomDeepseekV2ForCausalLM  # noqa: F401
    from .deepseek_v2 import CustomDeepseekV3ForCausalLM  # noqa: F401
-    from .qwen2_vl import CustomQwen2VLForConditionalGeneration  # noqa: F401
+    from .qwen2_5_vl import \
+        AscendQwen2_5_VLForConditionalGeneration  # noqa: F401
+    from .qwen2_vl import AscendQwen2VLForConditionalGeneration  # noqa: F401

    ModelRegistry.register_model(
        "DeepSeekMTPModel",
@@ -13,7 +15,12 @@ def register_model():

    ModelRegistry.register_model(
        "Qwen2VLForConditionalGeneration",
-        "vllm_ascend.models.qwen2_vl:CustomQwen2VLForConditionalGeneration")
+        "vllm_ascend.models.qwen2_vl:AscendQwen2VLForConditionalGeneration")
+
+    ModelRegistry.register_model(
+        "Qwen2_5_VLForConditionalGeneration",
+        "vllm_ascend.models.qwen2_5_vl:AscendQwen2_5_VLForConditionalGeneration"
+    )

    ModelRegistry.register_model(
        "DeepseekV2ForCausalLM",