From a69129d5b5fa9f0734f9af66568d1f74bb6dcdf8 Mon Sep 17 00:00:00 2001 From: Chranos <826995883@qq.com> Date: Wed, 11 Feb 2026 15:32:07 +0800 Subject: [PATCH] add deepseekv3 and llama4 --- vllm-v0.6.2/vllm_mlu/vllm_mlu/model_executor/layers/linear.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/vllm-v0.6.2/vllm_mlu/vllm_mlu/model_executor/layers/linear.py b/vllm-v0.6.2/vllm_mlu/vllm_mlu/model_executor/layers/linear.py index 8681cd6..ad7120e 100644 --- a/vllm-v0.6.2/vllm_mlu/vllm_mlu/model_executor/layers/linear.py +++ b/vllm-v0.6.2/vllm_mlu/vllm_mlu/model_executor/layers/linear.py @@ -26,6 +26,9 @@ def vllm__module_executor__layers__linear__UnquantizedLinearMethod__apply( beta = 1.0 residual = residual.view(-1, residual.shape[-1]) res_shape = x.shape[0:-1] + (layer.weight.shape[0], ) + if x.dtype != layer.weight.dtype: + logger.error("[DEBUG-DTYPE] matmul dtype mismatch: x.dtype=%s, weight.dtype=%s, x.shape=%s, weight.shape=%s, layer=%s", + x.dtype, layer.weight.dtype, x.shape, layer.weight.shape, type(layer).__name__) return mlu_ops.matmul(x.view(-1, x.shape[-1]), layer.weight, bias, residual, 'none', 1.0, beta).view(res_shape)