From 8c1a4dedf3486d1c65409b5350cc7c76ecec5a78 Mon Sep 17 00:00:00 2001 From: Peipei <51022443+booker123456@users.noreply.github.com> Date: Sat, 11 Oct 2025 08:37:07 +0800 Subject: [PATCH] [Bugfix]modify the enable range of _merge_multimodal_embeddings patch (#3360) ### What this PR does / why we need it? Modify the enable range of _merge_multimodal_embeddings patch. The current patch is only enabled for offline inference on the platform. For online serviceization, due to the addition of the worker sub-process, it is not enabled within the sub-process. ### Does this PR introduce _any_ user-facing change? None ### How was this patch tested? - vLLM version: v0.11.0rc3 - vLLM main: https://github.com/vllm-project/vllm/commit/v0.11.0 Signed-off-by: booker123456 <945658361@qq.com> --- vllm_ascend/patch/__init__.py | 2 +- vllm_ascend/patch/platform/patch_common/__init__.py | 1 - vllm_ascend/patch/worker/patch_common/__init__.py | 1 + .../{platform => worker}/patch_common/patch_multimodal_merge.py | 0 4 files changed, 2 insertions(+), 2 deletions(-) rename vllm_ascend/patch/{platform => worker}/patch_common/patch_multimodal_merge.py (100%) diff --git a/vllm_ascend/patch/__init__.py b/vllm_ascend/patch/__init__.py index 7d0a232..bf02390 100644 --- a/vllm_ascend/patch/__init__.py +++ b/vllm_ascend/patch/__init__.py @@ -56,7 +56,7 @@ # Future Plan: # Find a better way to support tensor alignment for 310p without this patch. # -# ** File: platform/patch_common/patch_multimodal_merge.py** +# ** File: worker/patch_common/patch_multimodal_merge.py** # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # 1. `vllm.model_executor.models.utils._merge_multimodal_embeddings` # Why: diff --git a/vllm_ascend/patch/platform/patch_common/__init__.py b/vllm_ascend/patch/platform/patch_common/__init__.py index 7942ac0..11b5cae 100644 --- a/vllm_ascend/patch/platform/patch_common/__init__.py +++ b/vllm_ascend/patch/platform/patch_common/__init__.py @@ -18,6 +18,5 @@ import vllm_ascend.patch.platform.patch_common.patch_config # noqa import vllm_ascend.patch.platform.patch_common.patch_distributed # noqa import vllm_ascend.patch.platform.patch_common.patch_mamba_config # noqa -import vllm_ascend.patch.platform.patch_common.patch_multimodal_merge # noqa import vllm_ascend.patch.worker.patch_common.patch_attention_selector # noqa import vllm_ascend.patch.worker.patch_common.patch_attentionspec # noqa diff --git a/vllm_ascend/patch/worker/patch_common/__init__.py b/vllm_ascend/patch/worker/patch_common/__init__.py index 3d233c4..896411b 100644 --- a/vllm_ascend/patch/worker/patch_common/__init__.py +++ b/vllm_ascend/patch/worker/patch_common/__init__.py @@ -27,6 +27,7 @@ import vllm_ascend.patch.worker.patch_common.patch_attention_layer # noqa import vllm_ascend.patch.worker.patch_common.patch_distributed # noqa import vllm_ascend.patch.worker.patch_common.patch_logits # noqa import vllm_ascend.patch.worker.patch_common.patch_weight_loader # noqa +import vllm_ascend.patch.worker.patch_common.patch_multimodal_merge # noqa # TODO: revert me when triton import is fixed # import vllm_ascend.patch.worker.patch_common.patch_minicpm # noqa diff --git a/vllm_ascend/patch/platform/patch_common/patch_multimodal_merge.py b/vllm_ascend/patch/worker/patch_common/patch_multimodal_merge.py similarity index 100% rename from vllm_ascend/patch/platform/patch_common/patch_multimodal_merge.py rename to vllm_ascend/patch/worker/patch_common/patch_multimodal_merge.py