FP4 weight loading and inference (2/2) (#3972)

This commit is contained in:
Trevor Morris
2025-04-08 17:26:21 -07:00
committed by GitHub
parent 5039d54772
commit 11d760d56a
6 changed files with 262 additions and 1 deletions

View File

@@ -279,6 +279,7 @@ class ModelConfig:
"moe_wna16",
]
compatible_quantization_methods = {
"modelopt_fp4": ["modelopt"],
"w8a8_int8": ["compressed-tensors", "compressed_tensors"],
"w8a8_fp8": ["compressed-tensors", "compressed_tensors"],
}