Make NCCL NVLS configurable (#3502)
This commit is contained in:
@@ -297,7 +297,7 @@ def _set_envs_and_config(server_args: ServerArgs):
|
|||||||
# Set global environments
|
# Set global environments
|
||||||
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
|
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
|
||||||
os.environ["NCCL_CUMEM_ENABLE"] = "0"
|
os.environ["NCCL_CUMEM_ENABLE"] = "0"
|
||||||
os.environ["NCCL_NVLS_ENABLE"] = "0"
|
os.environ["NCCL_NVLS_ENABLE"] = str(int(server_args.enable_nccl_nvls))
|
||||||
os.environ["TORCH_NCCL_AVOID_RECORD_STREAMS"] = "1"
|
os.environ["TORCH_NCCL_AVOID_RECORD_STREAMS"] = "1"
|
||||||
os.environ["CUDA_DEVICE_MAX_CONNECTIONS"] = "4"
|
os.environ["CUDA_DEVICE_MAX_CONNECTIONS"] = "4"
|
||||||
|
|
||||||
|
|||||||
@@ -140,6 +140,7 @@ class ServerArgs:
|
|||||||
disable_jump_forward: bool = False
|
disable_jump_forward: bool = False
|
||||||
disable_cuda_graph: bool = False
|
disable_cuda_graph: bool = False
|
||||||
disable_cuda_graph_padding: bool = False
|
disable_cuda_graph_padding: bool = False
|
||||||
|
enable_nccl_nvls: bool = False
|
||||||
disable_outlines_disk_cache: bool = False
|
disable_outlines_disk_cache: bool = False
|
||||||
disable_custom_all_reduce: bool = False
|
disable_custom_all_reduce: bool = False
|
||||||
disable_mla: bool = False
|
disable_mla: bool = False
|
||||||
@@ -783,6 +784,11 @@ class ServerArgs:
|
|||||||
action="store_true",
|
action="store_true",
|
||||||
help="Disable cuda graph when padding is needed. Still uses cuda graph when padding is not needed.",
|
help="Disable cuda graph when padding is needed. Still uses cuda graph when padding is not needed.",
|
||||||
)
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--enable-nccl-nvls",
|
||||||
|
action="store_true",
|
||||||
|
help="Enable NCCL NVLS for prefill heavy requests when available.",
|
||||||
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--disable-outlines-disk-cache",
|
"--disable-outlines-disk-cache",
|
||||||
action="store_true",
|
action="store_true",
|
||||||
|
|||||||
Reference in New Issue
Block a user