[Bugfix]modify the enable range of _merge_multimodal_embeddings patch (#3360)

### What this PR does / why we need it? Modify the enable range of _merge_multimodal_embeddings patch. The current patch is only enabled for offline inference on the platform. For online serviceization, due to the addition of the worker sub-process, it is not enabled within the sub-process. ### Does this PR introduce _any_ user-facing change? None ### How was this patch tested? - vLLM version: v0.11.0rc3 - vLLM main: https://github.com/vllm-project/vllm/commit/v0.11.0 Signed-off-by: booker123456 <945658361@qq.com>
2025-10-11 08:37:07 +08:00
parent 27e0f2c035
commit 8c1a4dedf3
4 changed files with 2 additions and 2 deletions
--- a/vllm_ascend/patch/platform/patch_common/init.py
+++ b/vllm_ascend/patch/platform/patch_common/init.py
@@ -18,6 +18,5 @@
 import vllm_ascend.patch.platform.patch_common.patch_config  # noqa
 import vllm_ascend.patch.platform.patch_common.patch_distributed  # noqa
 import vllm_ascend.patch.platform.patch_common.patch_mamba_config  # noqa
-import vllm_ascend.patch.platform.patch_common.patch_multimodal_merge  # noqa
 import vllm_ascend.patch.worker.patch_common.patch_attention_selector  # noqa
 import vllm_ascend.patch.worker.patch_common.patch_attentionspec  # noqa
--- a/vllm_ascend/patch/platform/patch_common/patch_multimodal_merge.py
+++ b/vllm_ascend/patch/platform/patch_common/patch_multimodal_merge.py
@@ -1,58 +0,0 @@
-#
-# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
-# Copyright 2023 The vLLM team.
-#
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# This file is a part of the vllm-ascend project.
-
-import torch
-import vllm
-from vllm.model_executor.models.utils import (_embedding_count_expression,
-                                              _flatten_embeddings)
-from vllm.multimodal import NestedTensors
-
-
-def _merge_multimodal_embeddings(
-    inputs_embeds: torch.Tensor,
-    is_multimodal: torch.Tensor,
-    multimodal_embeddings: NestedTensors,
-) -> torch.Tensor:
-    """
-    Merge ``multimodal_embeddings`` into ``inputs_embeds`` by overwriting the
-    positions in ``inputs_embeds`` corresponding to placeholder tokens in
-    ``input_ids``.
-
-    Note:
-        This updates ``inputs_embeds`` in place.
-    """
-    flattened = _flatten_embeddings(multimodal_embeddings)
-    try:
-        inputs_embeds[is_multimodal] = flattened
-    except RuntimeError as e:
-        num_expected_tokens = is_multimodal.sum().item()
-        assert isinstance(num_expected_tokens, int)
-
-        if flattened.shape[0] != num_expected_tokens:
-            expr = _embedding_count_expression(multimodal_embeddings)
-            raise ValueError(
-                f"Attempted to assign {expr} = {flattened.shape[0]} "
-                f"multimodal tokens to {num_expected_tokens} placeholders"
-            ) from e
-        else:
-            raise ValueError("Error during masked scatter operation") from e
-
-    return inputs_embeds
-
-
-vllm.model_executor.models.utils._merge_multimodal_embeddings = _merge_multimodal_embeddings