fix: support gelu_new activation function in gpt2 (#3712)

This commit is contained in:
Xiuyu Li
2025-03-04 04:09:52 -08:00
committed by GitHub
parent 37373ef2bb
commit 9545bfb28a
2 changed files with 24 additions and 7 deletions

View File

@@ -14,6 +14,7 @@
"""Fused operators for activation layers."""
import logging
import math
from typing import Optional
import torch
@@ -72,6 +73,16 @@ class GeluAndMul(CustomOp):
return out
class NewGELU(CustomOp):
def forward_native(self, x: torch.Tensor) -> torch.Tensor:
c = math.sqrt(2.0 / math.pi)
return 0.5 * x * (1.0 + torch.tanh(c * (x + 0.044715 * torch.pow(x, 3.0))))
def forward_cuda(self, x: torch.Tensor) -> torch.Tensor:
# TODO: Implement the CUDA kernel for NewGELU in sgl-kernel
return self.forward_native(x)
class QuickGELU(CustomOp):
def forward_native(self, x: torch.Tensor) -> torch.Tensor:
return x * torch.sigmoid(1.702 * x)