Urgent model support: support gemma-3-it (#4424)

This commit is contained in:
Mick
2025-03-17 08:37:32 +08:00
committed by GitHub
parent 402db5c58c
commit 9d02bb3e2a
21 changed files with 2565 additions and 85 deletions

View File

@@ -119,6 +119,26 @@ class GemmaRMSNorm(CustomOp):
return out
class Gemma3RMSNorm(nn.Module):
def __init__(self, dim: int, eps: float = 1e-6):
super().__init__()
self.eps = eps
self.weight = nn.Parameter(torch.zeros(dim))
def _norm(self, x):
return x * torch.rsqrt(x.pow(2).mean(-1, keepdim=True) + self.eps)
def forward(self, x):
output = self._norm(x.float())
# Llama does x.to(float16) * w whilst Gemma3 is (x * w).to(float16)
# See https://github.com/huggingface/transformers/pull/29402
output = output * (1.0 + self.weight.float())
return output.type_as(x)
def extra_repr(self):
return f"{tuple(self.weight.shape)}, eps={self.eps}"
if not _is_cuda:
logger.info(
"sgl-kernel is not available on Non-NV platforms. Fallback to other kernel libraries."