[AMD] Add silu_and_mul, gelu_and_mul, gelu_tanh_and_mul, and gelu_quick kernels for AMD GPUs (#7135)

Co-authored-by: yiakwy-xpu-ml-framework-team <961186938@qq.com>
Co-authored-by: HAI <hixiao@gmail.com>
This commit is contained in:
Hubert Lu
2025-07-24 23:44:28 -07:00
committed by GitHub
parent 7ad6b766c5
commit af4b9bae95
17 changed files with 1226 additions and 61 deletions

View File

@@ -36,16 +36,18 @@ def _get_version():
operator_namespace = "sgl_kernel"
include_dirs = [
root / "include",
root / "include" / "impl",
root / "csrc",
]
sources = [
"csrc/allreduce/custom_all_reduce.hip",
"csrc/allreduce/quick_all_reduce.cu",
"csrc/elementwise/activation.cu",
"csrc/moe/moe_align_kernel.cu",
"csrc/moe/moe_topk_softmax_kernels.cu",
"csrc/torch_extension_rocm.cc",
"csrc/speculative/eagle_utils.cu",
"csrc/torch_extension_rocm.cc",
]
cxx_flags = ["-O3"]
@@ -69,6 +71,7 @@ if amdgpu_target not in ["gfx942", "gfx950"]:
)
sys.exit(1)
hipcc_flags = [
"-DNDEBUG",
f"-DOPERATOR_NAMESPACE={operator_namespace}",