Sgl kernel fused_moe_gate support n_shared_experts (#5440)

This commit is contained in:
Xiaoyu Zhang
2025-04-18 14:05:15 +08:00
committed by GitHub
parent 53dcf38876
commit 8e09b37077
5 changed files with 140 additions and 38 deletions

View File

@@ -200,8 +200,14 @@ void topk_softmax(
torch::Tensor& token_expert_indices,
torch::Tensor& gating_output);
std::vector<at::Tensor>
moe_fused_gate(at::Tensor& input, at::Tensor& bias, int64_t num_expert_group, int64_t topk_group, int64_t topk);
std::vector<at::Tensor> moe_fused_gate(
at::Tensor& input,
at::Tensor& bias,
int64_t num_expert_group,
int64_t topk_group,
int64_t topk,
int64_t n_share_experts_fusion,
double routed_scaling_factor);
/*
* From csrc/speculative