From 88d6fd9a11bf8ec6ffcf91fa4534aec9163695ef Mon Sep 17 00:00:00 2001 From: kk <43161300+kkHuang-amd@users.noreply.github.com> Date: Tue, 8 Apr 2025 23:04:37 +0800 Subject: [PATCH] Fix torch compile errors (#5158) --- python/sglang/srt/layers/quantization/fp8_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/sglang/srt/layers/quantization/fp8_utils.py b/python/sglang/srt/layers/quantization/fp8_utils.py index 574dffd63..2038938ea 100644 --- a/python/sglang/srt/layers/quantization/fp8_utils.py +++ b/python/sglang/srt/layers/quantization/fp8_utils.py @@ -243,8 +243,8 @@ def apply_fp8_linear( if _is_cuda: qinput, x_scale = sglang_per_token_quant_fp8(input_2d) else: - qinput, x_scale = per_token_group_quant_fp8( - input_2d, group_size=input_2d.shape[1] + qinput, x_scale = ops.scaled_fp8_quant( + input_2d, input_scale, use_per_token_if_dynamic=use_per_token_if_dynamic ) if cutlass_fp8_supported: