From e0bd67be538ee84273b4b4f78465d42215641fc4 Mon Sep 17 00:00:00 2001 From: Chranos <826995883@qq.com> Date: Wed, 11 Feb 2026 15:48:35 +0800 Subject: [PATCH] add deepseekv3 and llama4 --- .../vllm/model_executor/layers/vocab_parallel_embedding.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/vllm-v0.6.2/vllm/model_executor/layers/vocab_parallel_embedding.py b/vllm-v0.6.2/vllm/model_executor/layers/vocab_parallel_embedding.py index 52771f5..776ec33 100644 --- a/vllm-v0.6.2/vllm/model_executor/layers/vocab_parallel_embedding.py +++ b/vllm-v0.6.2/vllm/model_executor/layers/vocab_parallel_embedding.py @@ -38,6 +38,9 @@ class UnquantizedEmbeddingMethod(QuantizeMethodBase): layer: torch.nn.Module, x: torch.Tensor, bias: Optional[torch.Tensor] = None) -> torch.Tensor: + # MLU F.linear requires matching dtypes + if x.dtype != layer.weight.dtype: + x = x.to(layer.weight.dtype) return F.linear(x, layer.weight, bias) def embedding(self, layer: torch.nn.Module,