Enable Nvidia's ModelOpt fp8 quantized models (#2535)

This commit is contained in:
Zhiyu
2025-01-06 14:54:52 -08:00
committed by GitHub
parent b8574f6953
commit 287427e2e6
5 changed files with 185 additions and 0 deletions

View File

@@ -150,6 +150,7 @@ class ModelRunner:
"enable_nan_detection": server_args.enable_nan_detection,
"enable_dp_attention": server_args.enable_dp_attention,
"enable_ep_moe": server_args.enable_ep_moe,
"modelopt_config": server_args.modelopt_config,
}
)