fix: remove cublas_grouped_gemm (#5307)

2025-04-11 16:22:37 -07:00
parent 034c5256cc
commit 136b8e6afb
8 changed files with 0 additions and 508 deletions
--- a/sgl-kernel/include/sgl_kernel_ops.h
+++ b/sgl-kernel/include/sgl_kernel_ops.h
@@ -160,13 +160,6 @@ void sgl_per_token_group_quant_int8(
    double int8_max);
 void sgl_per_tensor_quant_fp8(at::Tensor input, at::Tensor output_q, at::Tensor output_s, bool is_static);
 void sgl_per_token_quant_fp8(at::Tensor input, at::Tensor output_q, at::Tensor output_s);
-void cublas_grouped_gemm(
-    const std::vector<torch::Tensor>& inputs,
-    const std::vector<torch::Tensor>& weights,
-    const std::vector<torch::Tensor>& outputs,
-    const torch::Dtype& out_dtype,
-    int64_t cublas_handle,
-    int64_t cuda_stream);
 void bmm_fp8(
    at::Tensor A,
    at::Tensor B,