Sgl kernel fused_moe_gate support n_shared_experts (#5440)
This commit is contained in:
@@ -200,8 +200,14 @@ void topk_softmax(
|
||||
torch::Tensor& token_expert_indices,
|
||||
torch::Tensor& gating_output);
|
||||
|
||||
std::vector<at::Tensor>
|
||||
moe_fused_gate(at::Tensor& input, at::Tensor& bias, int64_t num_expert_group, int64_t topk_group, int64_t topk);
|
||||
std::vector<at::Tensor> moe_fused_gate(
|
||||
at::Tensor& input,
|
||||
at::Tensor& bias,
|
||||
int64_t num_expert_group,
|
||||
int64_t topk_group,
|
||||
int64_t topk,
|
||||
int64_t n_share_experts_fusion,
|
||||
double routed_scaling_factor);
|
||||
|
||||
/*
|
||||
* From csrc/speculative
|
||||
|
||||
Reference in New Issue
Block a user