From 762623869560030990953b7d4292037527b36ec4 Mon Sep 17 00:00:00 2001
From: Chranos <826995883@qq.com>
Date: Wed, 11 Feb 2026 15:40:19 +0800
Subject: [PATCH] add deepseekv3 and llama4

---
 .../vllm_mlu/vllm_mlu/model_executor/layers/linear.py    | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/vllm-v0.6.2/vllm_mlu/vllm_mlu/model_executor/layers/linear.py b/vllm-v0.6.2/vllm_mlu/vllm_mlu/model_executor/layers/linear.py
index 5a6f50d..6c7172f 100644
--- a/vllm-v0.6.2/vllm_mlu/vllm_mlu/model_executor/layers/linear.py
+++ b/vllm-v0.6.2/vllm_mlu/vllm_mlu/model_executor/layers/linear.py
@@ -26,9 +26,12 @@ def vllm__module_executor__layers__linear__UnquantizedLinearMethod__apply(
          beta = 1.0
          residual = residual.view(-1, residual.shape[-1])
     res_shape = x.shape[0:-1] + (layer.weight.shape[0], )
-    # MLU matmul requires matching dtypes; cast input to weight dtype
-    if x.dtype != layer.weight.dtype:
-        x = x.to(layer.weight.dtype)
+    # MLU matmul requires all tensors to have matching dtypes
+    target_dtype = layer.weight.dtype
+    if x.dtype != target_dtype:
+        x = x.to(target_dtype)
+    if residual is not None and residual.dtype != target_dtype:
+        residual = residual.to(target_dtype)
     return mlu_ops.matmul(x.view(-1, x.shape[-1]), layer.weight, bias, residual, 'none', 1.0, beta).view(res_shape)