add deepseekv3 and llama4

This commit is contained in:
Chranos
2026-02-11 15:24:13 +08:00
parent cba7ad6c59
commit 9b05d7285e
5 changed files with 320 additions and 9 deletions

View File

@@ -143,11 +143,14 @@ class RMSNorm(CustomOp):
from vllm import _mlu_ops as mlu_ops
x = x.view(-1, self.weight.data.shape[0])
weight = self.weight.data
if weight.dtype != x.dtype:
weight = weight.to(x.dtype)
if residual is not None:
residual = residual.view(-1, self.weight.data.shape[0])
return mlu_ops.fused_rms_norm(x, residual, self.weight.data, None, None, self.variance_epsilon, True)
return mlu_ops.fused_rms_norm(x, residual, weight, None, None, self.variance_epsilon, True)
else:
return mlu_ops.fused_rms_norm(x, residual, self.weight.data, None, None, self.variance_epsilon, False)
return mlu_ops.fused_rms_norm(x, residual, weight, None, None, self.variance_epsilon, False)
def extra_repr(self) -> str:
s = f"hidden_size={self.weight.data.size(0)}"