[Bugfix]modify the enable range of _merge_multimodal_embeddings patch (#3360)

### What this PR does / why we need it? Modify the enable range of _merge_multimodal_embeddings patch. The current patch is only enabled for offline inference on the platform. For online serviceization, due to the addition of the worker sub-process, it is not enabled within the sub-process. ### Does this PR introduce _any_ user-facing change? None ### How was this patch tested? - vLLM version: v0.11.0rc3 - vLLM main: https://github.com/vllm-project/vllm/commit/v0.11.0 Signed-off-by: booker123456 <945658361@qq.com>
2025-10-11 08:37:07 +08:00
parent 27e0f2c035
commit 8c1a4dedf3
4 changed files with 2 additions and 2 deletions
--- a/vllm_ascend/patch/init.py
+++ b/vllm_ascend/patch/init.py
@@ -56,7 +56,7 @@
 #    Future Plan:
 #       Find a better way to support tensor alignment for 310p without this patch.
 #
-# ** File: platform/patch_common/patch_multimodal_merge.py**
+# ** File: worker/patch_common/patch_multimodal_merge.py**
 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 #   1. `vllm.model_executor.models.utils._merge_multimodal_embeddings`
 #    Why:
--- a/vllm_ascend/patch/platform/patch_common/init.py
+++ b/vllm_ascend/patch/platform/patch_common/init.py
@@ -18,6 +18,5 @@
 import vllm_ascend.patch.platform.patch_common.patch_config  # noqa
 import vllm_ascend.patch.platform.patch_common.patch_distributed  # noqa
 import vllm_ascend.patch.platform.patch_common.patch_mamba_config  # noqa
-import vllm_ascend.patch.platform.patch_common.patch_multimodal_merge  # noqa
 import vllm_ascend.patch.worker.patch_common.patch_attention_selector  # noqa
 import vllm_ascend.patch.worker.patch_common.patch_attentionspec  # noqa
--- a/vllm_ascend/patch/worker/patch_common/init.py
+++ b/vllm_ascend/patch/worker/patch_common/init.py
@@ -27,6 +27,7 @@ import vllm_ascend.patch.worker.patch_common.patch_attention_layer  # noqa
 import vllm_ascend.patch.worker.patch_common.patch_distributed  # noqa
 import vllm_ascend.patch.worker.patch_common.patch_logits  # noqa
 import vllm_ascend.patch.worker.patch_common.patch_weight_loader  # noqa
+import vllm_ascend.patch.worker.patch_common.patch_multimodal_merge  # noqa

 # TODO: revert me when triton import is fixed
 # import vllm_ascend.patch.worker.patch_common.patch_minicpm  # noqa
--- a/vllm_ascend/patch/platform/patch_common/patch_multimodal_merge.py
+++ b/vllm_ascend/patch/platform/patch_common/patch_multimodal_merge.py