Add moe topk softmax templated from vllm (#4302)
This commit is contained in:
@@ -173,6 +173,12 @@ void moe_align_block_size(
|
||||
torch::Tensor token_cnts_buffer,
|
||||
torch::Tensor cumsum_buffer);
|
||||
|
||||
void topk_softmax(
|
||||
torch::Tensor& topk_weights,
|
||||
torch::Tensor& topk_indices,
|
||||
torch::Tensor& token_expert_indices,
|
||||
torch::Tensor& gating_output);
|
||||
|
||||
/*
|
||||
* From csrc/speculative
|
||||
*/
|
||||
|
||||
Reference in New Issue
Block a user