Support new DeepGEMM format in per token group quant (#7146)
This commit is contained in:
@@ -116,7 +116,7 @@ TORCH_LIBRARY_FRAGMENT(sgl_kernel, m) {
|
||||
|
||||
m.def(
|
||||
"sgl_per_token_group_quant_fp8(Tensor input, Tensor output_q, Tensor output_s, int group_size,"
|
||||
" float eps, float fp8_min, float fp8_max) -> ()");
|
||||
" float eps, float fp8_min, float fp8_max, bool scale_ue8m0) -> ()");
|
||||
m.impl("sgl_per_token_group_quant_fp8", torch::kCUDA, &sgl_per_token_group_quant_fp8);
|
||||
|
||||
m.def(
|
||||
|
||||
Reference in New Issue
Block a user