[sgl-kernel] Add cuda kernel for moe_ep_silu_and_mul (#6919)
Co-authored-by: luoyuan.luo <luoyuan.luo@antgroup.com>
This commit is contained in:
@@ -52,6 +52,7 @@ from sgl_kernel.moe import (
|
||||
cutlass_fp4_group_mm,
|
||||
ep_moe_post_reorder,
|
||||
ep_moe_pre_reorder,
|
||||
ep_moe_silu_and_mul,
|
||||
fp8_blockwise_scaled_grouped_mm,
|
||||
moe_align_block_size,
|
||||
moe_fused_gate,
|
||||
|
||||
@@ -88,6 +88,24 @@ def ep_moe_pre_reorder(
|
||||
)
|
||||
|
||||
|
||||
def ep_moe_silu_and_mul(
|
||||
gateup_output,
|
||||
down_input,
|
||||
reorder_topk_ids,
|
||||
scales,
|
||||
start_expert_id,
|
||||
end_expert_id,
|
||||
):
|
||||
return torch.ops.sgl_kernel.ep_moe_silu_and_mul.default(
|
||||
gateup_output,
|
||||
down_input,
|
||||
reorder_topk_ids,
|
||||
scales,
|
||||
start_expert_id,
|
||||
end_expert_id,
|
||||
)
|
||||
|
||||
|
||||
def ep_moe_post_reorder(
|
||||
down_output,
|
||||
output,
|
||||
|
||||
Reference in New Issue
Block a user