From d860f71e4dad4046427dd27214f014af12bd1607 Mon Sep 17 00:00:00 2001
From: Chranos <826995883@qq.com>
Date: Wed, 11 Feb 2026 15:44:44 +0800
Subject: [PATCH] add deepseekv3 and llama4

---
 .../vllm_mlu/vllm_mlu/model_executor/models/layer_utils.py   | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/vllm-v0.6.2/vllm_mlu/vllm_mlu/model_executor/models/layer_utils.py b/vllm-v0.6.2/vllm_mlu/vllm_mlu/model_executor/models/layer_utils.py
index bb2ba73..1956cf9 100755
--- a/vllm-v0.6.2/vllm_mlu/vllm_mlu/model_executor/models/layer_utils.py
+++ b/vllm-v0.6.2/vllm_mlu/vllm_mlu/model_executor/models/layer_utils.py
@@ -194,6 +194,11 @@ def decoder_model_forward_base_pp(
             hidden_states = inputs_embeds
         else:
             hidden_states = get_input_embeddings(input_ids)
+        # MLU F.embedding may output float32 even with float16 weights;
+        # cast to model dtype to avoid dtype mismatches downstream.
+        target_dtype = next(layers[start_layer].parameters()).dtype
+        if hidden_states.dtype != target_dtype:
+            hidden_states = hidden_states.to(target_dtype)
     else:
         assert intermediate_tensors is not None
         hidden_states = intermediate_tensors["hidden_states"]