forked from EngineX-Cambricon/enginex-mlu370-vllm
add deepseekv3 and llama4
This commit is contained in:
@@ -38,6 +38,9 @@ class UnquantizedEmbeddingMethod(QuantizeMethodBase):
|
|||||||
layer: torch.nn.Module,
|
layer: torch.nn.Module,
|
||||||
x: torch.Tensor,
|
x: torch.Tensor,
|
||||||
bias: Optional[torch.Tensor] = None) -> torch.Tensor:
|
bias: Optional[torch.Tensor] = None) -> torch.Tensor:
|
||||||
|
# MLU F.linear requires matching dtypes
|
||||||
|
if x.dtype != layer.weight.dtype:
|
||||||
|
x = x.to(layer.weight.dtype)
|
||||||
return F.linear(x, layer.weight, bias)
|
return F.linear(x, layer.weight, bias)
|
||||||
|
|
||||||
def embedding(self, layer: torch.nn.Module,
|
def embedding(self, layer: torch.nn.Module,
|
||||||
|
|||||||
Reference in New Issue
Block a user