Support not officially supported high sgl-kernel version with low srt version (#11786)
This commit is contained in:
@@ -265,6 +265,8 @@ from sgl_kernel.gemm import (
|
||||
scaled_fp4_quant,
|
||||
sgl_per_tensor_quant_fp8,
|
||||
sgl_per_token_group_quant_8bit,
|
||||
sgl_per_token_group_quant_fp8,
|
||||
sgl_per_token_group_quant_int8,
|
||||
sgl_per_token_quant_fp8,
|
||||
shuffle_rows,
|
||||
silu_and_mul_scaled_fp4_grouped_quant,
|
||||
|
||||
@@ -137,6 +137,11 @@ def sgl_per_token_group_quant_8bit(
|
||||
)
|
||||
|
||||
|
||||
# For legacy usage
|
||||
sgl_per_token_group_quant_fp8 = sgl_per_token_group_quant_8bit
|
||||
sgl_per_token_group_quant_int8 = sgl_per_token_group_quant_8bit
|
||||
|
||||
|
||||
def sgl_per_tensor_quant_fp8(
|
||||
input: torch.Tensor,
|
||||
output_q: torch.Tensor,
|
||||
|
||||
Reference in New Issue
Block a user