[EP] Add cuda kernel for moe_ep_pre_reorder (#6699)

Co-authored-by: luoyuan.luo <luoyuan.luo@antgroup.com>
This commit is contained in:
Yuan Luo
2025-06-02 11:49:01 +08:00
committed by GitHub
parent 20fd53b8f6
commit 55444ed667
7 changed files with 230 additions and 0 deletions

View File

@@ -46,6 +46,7 @@ from sgl_kernel.gemm import (
)
from sgl_kernel.grammar import apply_token_bitmask_inplace_cuda
from sgl_kernel.moe import (
ep_moe_pre_reorder,
fp8_blockwise_scaled_grouped_mm,
moe_align_block_size,
moe_fused_gate,

View File

@@ -62,6 +62,30 @@ def moe_fused_gate(
)
def ep_moe_pre_reorder(
input_tensor,
gateup_input,
src2dst,
topk_ids,
a1_scales,
start_expert_id,
end_expert_id,
topk,
use_per_token_if_dynamic,
):
return torch.ops.sgl_kernel.ep_moe_pre_reorder.default(
input_tensor,
gateup_input,
src2dst,
topk_ids,
a1_scales,
start_expert_id,
end_expert_id,
topk,
use_per_token_if_dynamic,
)
def fp8_blockwise_scaled_grouped_mm(
output,
a_ptrs,