[refactor] slightly tidy fp8 module (#5993)

This commit is contained in:
JieXin Liang
2025-05-08 08:28:24 +08:00
committed by GitHub
parent e444c13fb4
commit b70957fcf8
12 changed files with 238 additions and 231 deletions

View File

@@ -10,16 +10,14 @@ import torch
from compressed_tensors import CompressionFormat
from compressed_tensors.quantization import QuantizationStrategy
from sglang.srt.layers.quantization.fp8_kernel import scaled_fp8_quant
from sglang.srt.layers.quantization.fp8_kernel import is_fp8_fnuz, scaled_fp8_quant
from sglang.srt.layers.quantization.fp8_utils import normalize_e4m3fn_to_e4m3fnuz
from sglang.srt.layers.quantization.utils import (
all_close_1d,
is_cuda,
is_fp8_fnuz,
per_tensor_dequantize,
replace_parameter,
)
from sglang.srt.utils import set_weight_attrs
from sglang.srt.utils import is_cuda, set_weight_attrs
_is_cuda = is_cuda()

View File

@@ -15,11 +15,12 @@ from sglang.srt.layers.parameter import (
from sglang.srt.layers.quantization.compressed_tensors.schemes import (
CompressedTensorsScheme,
)
from sglang.srt.layers.quantization.fp8_kernel import is_fp8_fnuz
from sglang.srt.layers.quantization.fp8_utils import (
apply_fp8_linear,
normalize_e4m3fn_to_e4m3fnuz,
)
from sglang.srt.layers.quantization.utils import is_fp8_fnuz, requantize_with_max_scale
from sglang.srt.layers.quantization.utils import requantize_with_max_scale
__all__ = ["CompressedTensorsW8A8Fp8"]