[chore] Remove unused ep_moe cuda kernels (#9956)

This commit is contained in:
hlu1
2025-09-06 01:35:50 -07:00
committed by GitHub
parent 039cef76aa
commit 5f1eb20484
13 changed files with 4 additions and 1110 deletions

View File

@@ -209,18 +209,6 @@ TORCH_LIBRARY_FRAGMENT(sgl_kernel, m) {
"num_fused_shared_experts, float routed_scaling_factor, bool apply_routed_scaling_factor_on_output) -> "
"(Tensor[])");
m.impl("moe_fused_gate", torch::kCUDA, &moe_fused_gate);
m.def(
"ep_moe_pre_reorder(Tensor input, Tensor gateup_input, Tensor src2dst, Tensor topk_ids, Tensor "
"a1_scales, int start_expert_id, int end_expert_id, int topk, bool use_per_token_if_dynamic) -> ()");
m.impl("ep_moe_pre_reorder", torch::kCUDA, &ep_moe_pre_reorder);
m.def(
"ep_moe_silu_and_mul(Tensor gateup_output, Tensor down_input, Tensor reorder_topk_ids, Tensor scales, int "
"start_expert_id, int end_expert_id) -> ()");
m.impl("ep_moe_silu_and_mul", torch::kCUDA, &ep_moe_silu_and_mul);
m.def(
"ep_moe_post_reorder(Tensor down_output, Tensor output, Tensor src2dst, Tensor topk_ids, Tensor "
"topk_weights, int start_expert_id, int end_expert_id, int topk) -> ()");
m.impl("ep_moe_post_reorder", torch::kCUDA, &ep_moe_post_reorder);
m.def(
"fp8_blockwise_scaled_grouped_mm(Tensor output, Tensor a_ptrs, Tensor b_ptrs, Tensor out_ptrs, Tensor "
"a_scales_ptrs, Tensor b_scales_ptrs, Tensor a, Tensor b, Tensor scales_a, Tensor scales_b, Tensor "