Support new DeepGEMM format in per token group quant (#7146)
This commit is contained in:
@@ -175,7 +175,8 @@ void sgl_per_token_group_quant_fp8(
|
||||
int64_t group_size,
|
||||
double eps,
|
||||
double fp8_min,
|
||||
double fp8_max);
|
||||
double fp8_max,
|
||||
bool scale_ue8m0);
|
||||
void sgl_per_token_group_quant_int8(
|
||||
at::Tensor input,
|
||||
at::Tensor output_q,
|
||||
|
||||
Reference in New Issue
Block a user