fix: remove cublas_grouped_gemm (#5307)

This commit is contained in:
Yineng Zhang
2025-04-11 16:22:37 -07:00
committed by GitHub
parent 034c5256cc
commit 136b8e6afb
8 changed files with 0 additions and 508 deletions

View File

@@ -160,13 +160,6 @@ void sgl_per_token_group_quant_int8(
double int8_max);
void sgl_per_tensor_quant_fp8(at::Tensor input, at::Tensor output_q, at::Tensor output_s, bool is_static);
void sgl_per_token_quant_fp8(at::Tensor input, at::Tensor output_q, at::Tensor output_s);
void cublas_grouped_gemm(
const std::vector<torch::Tensor>& inputs,
const std::vector<torch::Tensor>& weights,
const std::vector<torch::Tensor>& outputs,
const torch::Dtype& out_dtype,
int64_t cublas_handle,
int64_t cuda_stream);
void bmm_fp8(
at::Tensor A,
at::Tensor B,