[sgl-kernel][1/N]Support Expert Specialization Grouped GEMM (#11432)
Co-authored-by: luoyuan.luo <luoyuan.luo@antgroup.com> Co-authored-by: PGFLMG <1106310035@qq.com> Co-authored-by: Xiaoyu Zhang <35585791+BBuf@users.noreply.github.com>
This commit is contained in:
@@ -821,3 +821,18 @@ void causal_conv1d_fwd(
|
||||
const std::optional<at::Tensor>& has_initial_state,
|
||||
bool silu_activation,
|
||||
int64_t pad_slot_id);
|
||||
|
||||
/*
|
||||
* From csrc/expert_specialization
|
||||
*/
|
||||
void es_fp8_blockwise_scaled_grouped_mm(
|
||||
torch::Tensor& output,
|
||||
const torch::Tensor& a,
|
||||
const torch::Tensor& b,
|
||||
const torch::Tensor& scales_a,
|
||||
const torch::Tensor& scales_b,
|
||||
const torch::Tensor& stride_a,
|
||||
const torch::Tensor& stride_b,
|
||||
const torch::Tensor& stride_d,
|
||||
const torch::Tensor& problem_sizes,
|
||||
const torch::Tensor& expert_offsets);
|
||||
|
||||
Reference in New Issue
Block a user