[sgl-kernel] Add cuda kernel for moe_ep_silu_and_mul (#6919)

Co-authored-by: luoyuan.luo <luoyuan.luo@antgroup.com>
This commit is contained in:
Yuan Luo
2025-06-12 11:43:08 +08:00
committed by GitHub
parent ef32677444
commit 84727a5139
8 changed files with 381 additions and 0 deletions

View File

@@ -52,6 +52,7 @@ from sgl_kernel.moe import (
cutlass_fp4_group_mm,
ep_moe_post_reorder,
ep_moe_pre_reorder,
ep_moe_silu_and_mul,
fp8_blockwise_scaled_grouped_mm,
moe_align_block_size,
moe_fused_gate,

View File

@@ -88,6 +88,24 @@ def ep_moe_pre_reorder(
)
def ep_moe_silu_and_mul(
gateup_output,
down_input,
reorder_topk_ids,
scales,
start_expert_id,
end_expert_id,
):
return torch.ops.sgl_kernel.ep_moe_silu_and_mul.default(
gateup_output,
down_input,
reorder_topk_ids,
scales,
start_expert_id,
end_expert_id,
)
def ep_moe_post_reorder(
down_output,
output,