From 762623869560030990953b7d4292037527b36ec4 Mon Sep 17 00:00:00 2001 From: Chranos <826995883@qq.com> Date: Wed, 11 Feb 2026 15:40:19 +0800 Subject: [PATCH] add deepseekv3 and llama4 --- .../vllm_mlu/vllm_mlu/model_executor/layers/linear.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/vllm-v0.6.2/vllm_mlu/vllm_mlu/model_executor/layers/linear.py b/vllm-v0.6.2/vllm_mlu/vllm_mlu/model_executor/layers/linear.py index 5a6f50d..6c7172f 100644 --- a/vllm-v0.6.2/vllm_mlu/vllm_mlu/model_executor/layers/linear.py +++ b/vllm-v0.6.2/vllm_mlu/vllm_mlu/model_executor/layers/linear.py @@ -26,9 +26,12 @@ def vllm__module_executor__layers__linear__UnquantizedLinearMethod__apply( beta = 1.0 residual = residual.view(-1, residual.shape[-1]) res_shape = x.shape[0:-1] + (layer.weight.shape[0], ) - # MLU matmul requires matching dtypes; cast input to weight dtype - if x.dtype != layer.weight.dtype: - x = x.to(layer.weight.dtype) + # MLU matmul requires all tensors to have matching dtypes + target_dtype = layer.weight.dtype + if x.dtype != target_dtype: + x = x.to(target_dtype) + if residual is not None and residual.dtype != target_dtype: + residual = residual.to(target_dtype) return mlu_ops.matmul(x.view(-1, x.shape[-1]), layer.weight, bias, residual, 'none', 1.0, beta).view(res_shape)