[EP] Add cuda kernel for moe_ep_post_reorder (#6837)

Co-authored-by: luoyuan.luo <luoyuan.luo@antgroup.com>
This commit is contained in:
Yuan Luo
2025-06-05 15:33:47 +08:00
committed by GitHub
parent 0166403c20
commit 43baba649e
7 changed files with 377 additions and 4 deletions

View File

@@ -264,6 +264,16 @@ void ep_moe_pre_reorder(
int64_t topk,
bool use_per_token_if_dynamic);
void ep_moe_post_reorder(
torch::Tensor down_output,
torch::Tensor output,
torch::Tensor src2dst,
torch::Tensor topk_ids,
torch::Tensor topk_weights,
int64_t start_expert_id,
int64_t end_expert_id,
int64_t topk);
void shuffle_rows(const torch::Tensor& input_tensor, const torch::Tensor& dst2src_map, torch::Tensor& output_tensor);
void cutlass_fp4_group_mm(