From a5ae07a5d2bb4701dbac4dc5b985e94b36404c33 Mon Sep 17 00:00:00 2001 From: Li Wang Date: Wed, 31 Dec 2025 09:49:55 +0800 Subject: [PATCH] [Bugfix] Fix mm_merge (#5249) ### What this PR does / why we need it? We should transfer the mm_embed to the dtype of input_embed before performing the in-place assignment - vLLM version: release/v0.13.0 - vLLM main: https://github.com/vllm-project/vllm/commit/ad32e3e19ccf0526cb6744a5fed09a138a5fb2f9 Signed-off-by: wangli --- vllm_ascend/patch/worker/patch_multimodal_merge.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vllm_ascend/patch/worker/patch_multimodal_merge.py b/vllm_ascend/patch/worker/patch_multimodal_merge.py index c8a1d5c3..f6301380 100644 --- a/vllm_ascend/patch/worker/patch_multimodal_merge.py +++ b/vllm_ascend/patch/worker/patch_multimodal_merge.py @@ -37,8 +37,9 @@ def _merge_multimodal_embeddings( This updates ``inputs_embeds`` in place. """ flattened = _flatten_embeddings(multimodal_embeddings) + input_dtype = inputs_embeds.dtype try: - inputs_embeds[is_multimodal] = flattened + inputs_embeds[is_multimodal] = flattened.to(dtype=input_dtype) except RuntimeError as e: num_expected_tokens = is_multimodal.sum().item() assert isinstance(num_expected_tokens, int)