From 819924748add1ffd8175c5cd6b2d6a009706d0cd Mon Sep 17 00:00:00 2001 From: HAI Date: Mon, 7 Apr 2025 00:34:08 -0700 Subject: [PATCH] Fix refactor error - fp8.py (#5106) Co-authored-by: Lianmin Zheng --- python/sglang/srt/layers/quantization/fp8.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/sglang/srt/layers/quantization/fp8.py b/python/sglang/srt/layers/quantization/fp8.py index f977d899b..328d82215 100644 --- a/python/sglang/srt/layers/quantization/fp8.py +++ b/python/sglang/srt/layers/quantization/fp8.py @@ -860,7 +860,7 @@ class Fp8MoEMethod: layer.w13_weight_scale1[expert_id] *= max_w13_scales[expert_id] layer.w2_weight_scale1[expert_id] *= layer.w2_weight_scale[expert_id] - def process_weights_hip_scale_padding(self, layer: Module, padding_size: int): + def process_weights_hip_scale_padding(self, layer: Module): from sglang.srt.layers.moe.fused_moe_triton.fused_moe import ( padding_size, # Avoid circular import )