Support NVFP4 quantized dense models on AMD CDNA2/CDNA3 GPUs (#7302)
Co-authored-by: HAI <hixiao@gmail.com> Co-authored-by: Sai Enduri <saimanas.enduri@amd.com>
This commit is contained in:
@@ -391,6 +391,7 @@ class ModelConfig:
|
||||
"compressed-tensors",
|
||||
"fbgemm_fp8",
|
||||
"w8a8_fp8",
|
||||
"petit_nvfp4",
|
||||
]
|
||||
optimized_quantization_methods = [
|
||||
"fp8",
|
||||
@@ -408,9 +409,11 @@ class ModelConfig:
|
||||
"moe_wna16",
|
||||
"qoq",
|
||||
"w4afp8",
|
||||
"petit_nvfp4",
|
||||
]
|
||||
compatible_quantization_methods = {
|
||||
"modelopt_fp4": ["modelopt"],
|
||||
"petit_nvfp4": ["modelopt"],
|
||||
"w8a8_int8": ["compressed-tensors", "compressed_tensors"],
|
||||
"w8a8_fp8": ["compressed-tensors", "compressed_tensors"],
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user