Apply sgl w8a8 fp8 kernel (#3148)
This commit is contained in:
@@ -250,9 +250,11 @@ class ModelConfig:
|
||||
"compressed-tensors",
|
||||
"experts_int8",
|
||||
"w8a8_int8",
|
||||
"w8a8_fp8",
|
||||
]
|
||||
compatible_quantization_methods = {
|
||||
"w8a8_int8": ["compressed-tensors", "compressed_tensors"]
|
||||
"w8a8_int8": ["compressed-tensors", "compressed_tensors"],
|
||||
"w8a8_fp8": ["compressed-tensors", "compressed_tensors"],
|
||||
}
|
||||
if self.quantization is not None:
|
||||
self.quantization = self.quantization.lower()
|
||||
|
||||
Reference in New Issue
Block a user