update and simplify CustomOp (#3249)

This commit is contained in:
Yineng Zhang
2025-02-01 18:56:44 +08:00
committed by GitHub
parent 17dbf976c5
commit 4eb4b401cc
8 changed files with 46 additions and 45 deletions

View File

@@ -25,21 +25,18 @@ from sglang.srt.utils import is_cuda_available
if is_cuda_available():
from sgl_kernel import gelu_and_mul, gelu_tanh_and_mul, silu_and_mul
from vllm.model_executor.custom_op import CustomOp
from sglang.srt.custom_op import CustomOp
from sglang.srt.distributed import (
divide,
get_tensor_model_parallel_rank,
get_tensor_model_parallel_world_size,
)
from sglang.srt.layers.custom_op_util import register_custom_op
from sglang.srt.layers.quantization.base_config import QuantizationConfig
from sglang.srt.utils import set_weight_attrs
logger = logging.getLogger(__name__)
@register_custom_op("sglang_silu_and_mul")
class SiluAndMul(CustomOp):
def forward_native(self, x: torch.Tensor) -> torch.Tensor:
d = x.shape[-1] // 2
@@ -53,7 +50,6 @@ class SiluAndMul(CustomOp):
return out
@register_custom_op("sglang_gelu_and_mul")
class GeluAndMul(CustomOp):
def __init__(self, approximate="tanh"):
super().__init__()