[EP] Add cuda kernel for moe_ep_post_reorder (#6837)
Co-authored-by: luoyuan.luo <luoyuan.luo@antgroup.com>
This commit is contained in:
@@ -264,6 +264,16 @@ void ep_moe_pre_reorder(
|
||||
int64_t topk,
|
||||
bool use_per_token_if_dynamic);
|
||||
|
||||
void ep_moe_post_reorder(
|
||||
torch::Tensor down_output,
|
||||
torch::Tensor output,
|
||||
torch::Tensor src2dst,
|
||||
torch::Tensor topk_ids,
|
||||
torch::Tensor topk_weights,
|
||||
int64_t start_expert_id,
|
||||
int64_t end_expert_id,
|
||||
int64_t topk);
|
||||
|
||||
void shuffle_rows(const torch::Tensor& input_tensor, const torch::Tensor& dst2src_map, torch::Tensor& output_tensor);
|
||||
|
||||
void cutlass_fp4_group_mm(
|
||||
|
||||
Reference in New Issue
Block a user