Support not officially supported high sgl-kernel version with low srt version (#11786)

This commit is contained in:
fzyzcjy
2025-10-19 16:11:59 +08:00
committed by GitHub
parent ce399e154c
commit a27825ae01
2 changed files with 7 additions and 0 deletions

View File

@@ -265,6 +265,8 @@ from sgl_kernel.gemm import (
scaled_fp4_quant,
sgl_per_tensor_quant_fp8,
sgl_per_token_group_quant_8bit,
sgl_per_token_group_quant_fp8,
sgl_per_token_group_quant_int8,
sgl_per_token_quant_fp8,
shuffle_rows,
silu_and_mul_scaled_fp4_grouped_quant,

View File

@@ -137,6 +137,11 @@ def sgl_per_token_group_quant_8bit(
)
# For legacy usage
sgl_per_token_group_quant_fp8 = sgl_per_token_group_quant_8bit
sgl_per_token_group_quant_int8 = sgl_per_token_group_quant_8bit
def sgl_per_tensor_quant_fp8(
input: torch.Tensor,
output_q: torch.Tensor,