[NVIDIA] [2/N] Optimize silu_and_mul_scaled_fp4_grouped_quant perf (#9556)
This commit is contained in:
@@ -394,9 +394,8 @@ void silu_and_mul_scaled_fp4_experts_quant(
|
||||
torch::Tensor& output_scale,
|
||||
torch::Tensor const& input,
|
||||
torch::Tensor const& input_global_scale,
|
||||
torch::Tensor const& input_offset_by_experts,
|
||||
torch::Tensor const& output_scale_offset_by_experts,
|
||||
torch::Tensor const& mask);
|
||||
torch::Tensor const& mask,
|
||||
bool use_silu_and_mul);
|
||||
/*
|
||||
* From csrc/moe/cutlass_moe/w4a8
|
||||
*/
|
||||
|
||||
Reference in New Issue
Block a user