Apply sgl w8a8 fp8 kernel (#3148)

This commit is contained in:
HandH1998
2025-03-09 16:03:32 +08:00
committed by GitHub
parent 9fb48f951f
commit 0dd6cda288
13 changed files with 523 additions and 37 deletions

View File

@@ -250,9 +250,11 @@ class ModelConfig:
"compressed-tensors",
"experts_int8",
"w8a8_int8",
"w8a8_fp8",
]
compatible_quantization_methods = {
"w8a8_int8": ["compressed-tensors", "compressed_tensors"]
"w8a8_int8": ["compressed-tensors", "compressed_tensors"],
"w8a8_fp8": ["compressed-tensors", "compressed_tensors"],
}
if self.quantization is not None:
self.quantization = self.quantization.lower()