add deepseekv3 and llama4

This commit is contained in:
Chranos
2026-02-11 15:48:35 +08:00
parent e752946445
commit 6eae065dd6

View File

@@ -38,6 +38,9 @@ class UnquantizedEmbeddingMethod(QuantizeMethodBase):
layer: torch.nn.Module,
x: torch.Tensor,
bias: Optional[torch.Tensor] = None) -> torch.Tensor:
# MLU F.linear requires matching dtypes
if x.dtype != layer.weight.dtype:
x = x.to(layer.weight.dtype)
return F.linear(x, layer.weight, bias)
def embedding(self, layer: torch.nn.Module,