From a5ae07a5d2bb4701dbac4dc5b985e94b36404c33 Mon Sep 17 00:00:00 2001
From: Li Wang <wangli858794774@gmail.com>
Date: Wed, 31 Dec 2025 09:49:55 +0800
Subject: [PATCH] [Bugfix] Fix mm_merge (#5249)

### What this PR does / why we need it?
We should transfer the mm_embed to the dtype of input_embed before
performing the in-place assignment

- vLLM version: release/v0.13.0
- vLLM main:
https://github.com/vllm-project/vllm/commit/ad32e3e19ccf0526cb6744a5fed09a138a5fb2f9

Signed-off-by: wangli <wangli858794774@gmail.com>
---
 vllm_ascend/patch/worker/patch_multimodal_merge.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/vllm_ascend/patch/worker/patch_multimodal_merge.py b/vllm_ascend/patch/worker/patch_multimodal_merge.py
index c8a1d5c3..f6301380 100644
--- a/vllm_ascend/patch/worker/patch_multimodal_merge.py
+++ b/vllm_ascend/patch/worker/patch_multimodal_merge.py
@@ -37,8 +37,9 @@ def _merge_multimodal_embeddings(
         This updates ``inputs_embeds`` in place.
     """
     flattened = _flatten_embeddings(multimodal_embeddings)
+    input_dtype = inputs_embeds.dtype
     try:
-        inputs_embeds[is_multimodal] = flattened
+        inputs_embeds[is_multimodal] = flattened.to(dtype=input_dtype)
     except RuntimeError as e:
         num_expected_tokens = is_multimodal.sum().item()
         assert isinstance(num_expected_tokens, int)