From c32fb7a24d26d4a9654d9d4fa407bc01ca3dbba6 Mon Sep 17 00:00:00 2001
From: sogalin <39478626+sogalin@users.noreply.github.com>
Date: Thu, 18 Sep 2025 08:44:59 +0800
Subject: [PATCH] [ROCm] Fix fp8 quantization accuracy issue. (#10558)

---
 python/sglang/srt/layers/quantization/fp8_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/sglang/srt/layers/quantization/fp8_utils.py b/python/sglang/srt/layers/quantization/fp8_utils.py
index b09b80907..3066842f0 100644
--- a/python/sglang/srt/layers/quantization/fp8_utils.py
+++ b/python/sglang/srt/layers/quantization/fp8_utils.py
@@ -732,7 +732,7 @@ def apply_fp8_linear(
                 # final solution should be: 1. add support to per-tensor activation scaling.
                 # 2. solve the torch.compile error from weight_scale.numel() == 1 and x_scale.numel() > 1 (below line#308)
                 if _is_hip and weight_scale.numel() == 1:
-                    qinput, x_scale = ops.scaled_fp8_quant(
+                    qinput, x_scale = scaled_fp8_quant(
                         input_2d,
                         input_scale,
                         use_per_token_if_dynamic=use_per_token_if_dynamic,