Support new DeepGEMM format in per token group quant (#7146)
This commit is contained in:
@@ -255,7 +255,10 @@ def sglang_per_token_group_quant_8bit(
|
||||
f8_info = torch.finfo(dtype)
|
||||
fp8_max = f8_info.max
|
||||
fp8_min = f8_info.min
|
||||
sgl_per_token_group_quant_fp8(x, x_q, x_s, group_size, eps, fp8_min, fp8_max)
|
||||
scale_ue8m0 = False # TODO also test true
|
||||
sgl_per_token_group_quant_fp8(
|
||||
x, x_q, x_s, group_size, eps, fp8_min, fp8_max, scale_ue8m0
|
||||
)
|
||||
|
||||
return x_q, x_s
|
||||
|
||||
|
||||
Reference in New Issue
Block a user