Support OCP MXFP4 quantization on AMD GPUs (#8255)

Co-authored-by: wunhuang <wunhuang@amd.com> Co-authored-by: Hubert Lu <Hubert.Lu@amd.com>
2025-08-05 09:14:52 +08:00
parent 7cb20754fa
commit d4bf5a8524
12 changed files with 1159 additions and 1 deletions
--- a/python/sglang/srt/configs/model_config.py
+++ b/python/sglang/srt/configs/model_config.py
@@ -401,6 +401,8 @@ class ModelConfig:
            "fbgemm_fp8",
            "w8a8_fp8",
            "petit_nvfp4",
+            "quark",
+            "mxfp4",
        ]
        optimized_quantization_methods = [
            "fp8",