fix: support gelu_new activation function in gpt2 (#3712)
This commit is contained in:
@@ -14,6 +14,7 @@
|
||||
"""Fused operators for activation layers."""
|
||||
|
||||
import logging
|
||||
import math
|
||||
from typing import Optional
|
||||
|
||||
import torch
|
||||
@@ -72,6 +73,16 @@ class GeluAndMul(CustomOp):
|
||||
return out
|
||||
|
||||
|
||||
class NewGELU(CustomOp):
|
||||
def forward_native(self, x: torch.Tensor) -> torch.Tensor:
|
||||
c = math.sqrt(2.0 / math.pi)
|
||||
return 0.5 * x * (1.0 + torch.tanh(c * (x + 0.044715 * torch.pow(x, 3.0))))
|
||||
|
||||
def forward_cuda(self, x: torch.Tensor) -> torch.Tensor:
|
||||
# TODO: Implement the CUDA kernel for NewGELU in sgl-kernel
|
||||
return self.forward_native(x)
|
||||
|
||||
|
||||
class QuickGELU(CustomOp):
|
||||
def forward_native(self, x: torch.Tensor) -> torch.Tensor:
|
||||
return x * torch.sigmoid(1.702 * x)
|
||||
|
||||
Reference in New Issue
Block a user