Make NCCL NVLS configurable (#3502)
This commit is contained in:
@@ -140,6 +140,7 @@ class ServerArgs:
|
||||
disable_jump_forward: bool = False
|
||||
disable_cuda_graph: bool = False
|
||||
disable_cuda_graph_padding: bool = False
|
||||
enable_nccl_nvls: bool = False
|
||||
disable_outlines_disk_cache: bool = False
|
||||
disable_custom_all_reduce: bool = False
|
||||
disable_mla: bool = False
|
||||
@@ -783,6 +784,11 @@ class ServerArgs:
|
||||
action="store_true",
|
||||
help="Disable cuda graph when padding is needed. Still uses cuda graph when padding is not needed.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--enable-nccl-nvls",
|
||||
action="store_true",
|
||||
help="Enable NCCL NVLS for prefill heavy requests when available.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--disable-outlines-disk-cache",
|
||||
action="store_true",
|
||||
|
||||
Reference in New Issue
Block a user