fix: support gelu_new activation function in gpt2 (#3712)

2025-03-04 04:09:52 -08:00
parent 37373ef2bb
commit 9545bfb28a
2 changed files with 24 additions and 7 deletions
--- a/python/sglang/srt/layers/activation.py
+++ b/python/sglang/srt/layers/activation.py
@@ -14,6 +14,7 @@
 """Fused operators for activation layers."""

 import logging
+import math
 from typing import Optional

 import torch
@@ -72,6 +73,16 @@ class GeluAndMul(CustomOp):
        return out


+class NewGELU(CustomOp):
+    def forward_native(self, x: torch.Tensor) -> torch.Tensor:
+        c = math.sqrt(2.0 / math.pi)
+        return 0.5 * x * (1.0 + torch.tanh(c * (x + 0.044715 * torch.pow(x, 3.0))))
+
+    def forward_cuda(self, x: torch.Tensor) -> torch.Tensor:
+        # TODO: Implement the CUDA kernel for NewGELU in sgl-kernel
+        return self.forward_native(x)
+
+
 class QuickGELU(CustomOp):
    def forward_native(self, x: torch.Tensor) -> torch.Tensor:
        return x * torch.sigmoid(1.702 * x)