Apply sgl w8a8 fp8 kernel (#3148)

2025-03-09 16:03:32 +08:00
parent 9fb48f951f
commit 0dd6cda288
13 changed files with 523 additions and 37 deletions
--- a/python/sglang/srt/configs/model_config.py
+++ b/python/sglang/srt/configs/model_config.py
@@ -250,9 +250,11 @@ class ModelConfig:
            "compressed-tensors",
            "experts_int8",
            "w8a8_int8",
+            "w8a8_fp8",
        ]
        compatible_quantization_methods = {
-            "w8a8_int8": ["compressed-tensors", "compressed_tensors"]
+            "w8a8_int8": ["compressed-tensors", "compressed_tensors"],
+            "w8a8_fp8": ["compressed-tensors", "compressed_tensors"],
        }
        if self.quantization is not None:
            self.quantization = self.quantization.lower()