From d860f71e4dad4046427dd27214f014af12bd1607 Mon Sep 17 00:00:00 2001 From: Chranos <826995883@qq.com> Date: Wed, 11 Feb 2026 15:44:44 +0800 Subject: [PATCH] add deepseekv3 and llama4 --- .../vllm_mlu/vllm_mlu/model_executor/models/layer_utils.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/vllm-v0.6.2/vllm_mlu/vllm_mlu/model_executor/models/layer_utils.py b/vllm-v0.6.2/vllm_mlu/vllm_mlu/model_executor/models/layer_utils.py index bb2ba73..1956cf9 100755 --- a/vllm-v0.6.2/vllm_mlu/vllm_mlu/model_executor/models/layer_utils.py +++ b/vllm-v0.6.2/vllm_mlu/vllm_mlu/model_executor/models/layer_utils.py @@ -194,6 +194,11 @@ def decoder_model_forward_base_pp( hidden_states = inputs_embeds else: hidden_states = get_input_embeddings(input_ids) + # MLU F.embedding may output float32 even with float16 weights; + # cast to model dtype to avoid dtype mismatches downstream. + target_dtype = next(layers[start_layer].parameters()).dtype + if hidden_states.dtype != target_dtype: + hidden_states = hidden_states.to(target_dtype) else: assert intermediate_tensors is not None hidden_states = intermediate_tensors["hidden_states"]