[refactor] slightly tidy fp8 module (#5993)

2025-05-08 08:28:24 +08:00
parent e444c13fb4
commit b70957fcf8
12 changed files with 238 additions and 231 deletions
--- a/python/sglang/srt/layers/quantization/compressed_tensors/compressed_tensors_moe.py
+++ b/python/sglang/srt/layers/quantization/compressed_tensors/compressed_tensors_moe.py
@@ -10,16 +10,14 @@ import torch
 from compressed_tensors import CompressionFormat
 from compressed_tensors.quantization import QuantizationStrategy

-from sglang.srt.layers.quantization.fp8_kernel import scaled_fp8_quant
+from sglang.srt.layers.quantization.fp8_kernel import is_fp8_fnuz, scaled_fp8_quant
 from sglang.srt.layers.quantization.fp8_utils import normalize_e4m3fn_to_e4m3fnuz
 from sglang.srt.layers.quantization.utils import (
    all_close_1d,
-    is_cuda,
-    is_fp8_fnuz,
    per_tensor_dequantize,
    replace_parameter,
 )
-from sglang.srt.utils import set_weight_attrs
+from sglang.srt.utils import is_cuda, set_weight_attrs

 _is_cuda = is_cuda()

--- a/python/sglang/srt/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_fp8.py
+++ b/python/sglang/srt/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_fp8.py
@@ -15,11 +15,12 @@ from sglang.srt.layers.parameter import (
 from sglang.srt.layers.quantization.compressed_tensors.schemes import (
    CompressedTensorsScheme,
 )
+from sglang.srt.layers.quantization.fp8_kernel import is_fp8_fnuz
 from sglang.srt.layers.quantization.fp8_utils import (
    apply_fp8_linear,
    normalize_e4m3fn_to_e4m3fnuz,
 )
-from sglang.srt.layers.quantization.utils import is_fp8_fnuz, requantize_with_max_scale
+from sglang.srt.layers.quantization.utils import requantize_with_max_scale

 __all__ = ["CompressedTensorsW8A8Fp8"]