FP4 weight loading and inference (2/2) (#3972)

2025-04-08 17:26:21 -07:00
parent 5039d54772
commit 11d760d56a
6 changed files with 262 additions and 1 deletions
--- a/python/sglang/srt/configs/model_config.py
+++ b/python/sglang/srt/configs/model_config.py
@@ -279,6 +279,7 @@ class ModelConfig:
            "moe_wna16",
        ]
        compatible_quantization_methods = {
+            "modelopt_fp4": ["modelopt"],
            "w8a8_int8": ["compressed-tensors", "compressed_tensors"],
            "w8a8_fp8": ["compressed-tensors", "compressed_tensors"],
        }