[Optimization][Perf] Disable the GC during CUDA graph capture to speed up by up to 3x (#8577)
This commit is contained in:
@@ -215,6 +215,7 @@ class ServerArgs:
|
||||
disable_cuda_graph: bool = False
|
||||
disable_cuda_graph_padding: bool = False
|
||||
enable_profile_cuda_graph: bool = False
|
||||
enable_cudagraph_gc: bool = False
|
||||
enable_nccl_nvls: bool = False
|
||||
enable_tokenizer_batch_encode: bool = False
|
||||
disable_outlines_disk_cache: bool = False
|
||||
@@ -1545,6 +1546,11 @@ class ServerArgs:
|
||||
action="store_true",
|
||||
help="Enable profiling of cuda graph capture.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--enable-cudagraph-gc",
|
||||
action="store_true",
|
||||
help="Enable garbage collection during CUDA graph capture. If disabled (default), GC is frozen during capture to speed up the process.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--enable-nccl-nvls",
|
||||
action="store_true",
|
||||
|
||||
Reference in New Issue
Block a user