Add support for NCCL symmetric memory for TP allreduces (#8238)
This commit is contained in:
@@ -623,8 +623,9 @@ class Engine(EngineBase):
|
||||
def _set_envs_and_config(server_args: ServerArgs):
|
||||
# Set global environments
|
||||
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
|
||||
os.environ["NCCL_CUMEM_ENABLE"] = "0"
|
||||
os.environ["NCCL_NVLS_ENABLE"] = str(int(server_args.enable_nccl_nvls))
|
||||
os.environ["NCCL_CUMEM_ENABLE"] = str(int(server_args.enable_symm_mem))
|
||||
if not server_args.enable_symm_mem:
|
||||
os.environ["NCCL_NVLS_ENABLE"] = str(int(server_args.enable_nccl_nvls))
|
||||
os.environ["TORCH_NCCL_AVOID_RECORD_STREAMS"] = "1"
|
||||
os.environ["CUDA_DEVICE_MAX_CONNECTIONS"] = "4"
|
||||
os.environ["CUDA_MODULE_LOADING"] = "AUTO"
|
||||
|
||||
Reference in New Issue
Block a user