enable auto-round quantization model (#6226)

Signed-off-by: Zhang, Weiwei1 <weiwei1.zhang@intel.com>
2025-09-08 13:05:35 +08:00
parent b67c277f86
commit c8295d2353
8 changed files with 528 additions and 0 deletions
--- a/python/sglang/srt/configs/model_config.py
+++ b/python/sglang/srt/configs/model_config.py
@@ -450,6 +450,7 @@ class ModelConfig:
            "petit_nvfp4",
            "quark",
            "mxfp4",
+            "auto-round",
        ]
        optimized_quantization_methods = [
            "fp8",