Enable Nvidia's ModelOpt fp8 quantized models (#2535)
This commit is contained in:
@@ -150,6 +150,7 @@ class ModelRunner:
|
||||
"enable_nan_detection": server_args.enable_nan_detection,
|
||||
"enable_dp_attention": server_args.enable_dp_attention,
|
||||
"enable_ep_moe": server_args.enable_ep_moe,
|
||||
"modelopt_config": server_args.modelopt_config,
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user