Enable native ModelOpt quantization support (1/3) (#7149)
Signed-off-by: Zhiyu Cheng <zhiyuc@nvidia.com>
This commit is contained in:
11
python/sglang/srt/layers/modelopt_utils.py
Normal file
11
python/sglang/srt/layers/modelopt_utils.py
Normal file
@@ -0,0 +1,11 @@
|
||||
"""
|
||||
ModelOpt related constants
|
||||
"""
|
||||
|
||||
QUANT_CFG_CHOICES = {
|
||||
"fp8": "FP8_DEFAULT_CFG",
|
||||
"int4_awq": "INT4_AWQ_CFG", # TODO: add support for int4_awq
|
||||
"w4a8_awq": "W4A8_AWQ_BETA_CFG", # TODO: add support for w4a8_awq
|
||||
"nvfp4": "NVFP4_DEFAULT_CFG",
|
||||
"nvfp4_awq": "NVFP4_AWQ_LITE_CFG", # TODO: add support for nvfp4_awq
|
||||
}
|
||||
@@ -72,7 +72,7 @@ if TYPE_CHECKING:
|
||||
BASE_QUANTIZATION_METHODS: Dict[str, Type[QuantizationConfig]] = {
|
||||
"fp8": Fp8Config,
|
||||
"blockwise_int8": BlockInt8Config,
|
||||
"modelopt": ModelOptFp8Config,
|
||||
"modelopt_fp8": ModelOptFp8Config,
|
||||
"modelopt_fp4": ModelOptFp4Config,
|
||||
"w8a8_int8": W8A8Int8Config,
|
||||
"w8a8_fp8": W8A8Fp8Config,
|
||||
|
||||
@@ -113,7 +113,7 @@ class ModelOptFp8Config(QuantizationConfig):
|
||||
|
||||
@classmethod
|
||||
def get_name(cls) -> str:
|
||||
return "modelopt"
|
||||
return "modelopt_fp8"
|
||||
|
||||
@classmethod
|
||||
def get_supported_act_dtypes(cls) -> List[torch.dtype]:
|
||||
|
||||
Reference in New Issue
Block a user