[Optimization][Perf] Disable the GC during CUDA graph capture to speed up by up to 3x (#8577)

This commit is contained in:
Brayden Zhong
2025-07-31 14:31:21 -04:00
committed by GitHub
parent aee0ef52f5
commit 4acf690206
2 changed files with 31 additions and 1 deletions

View File

@@ -215,6 +215,7 @@ class ServerArgs:
disable_cuda_graph: bool = False
disable_cuda_graph_padding: bool = False
enable_profile_cuda_graph: bool = False
enable_cudagraph_gc: bool = False
enable_nccl_nvls: bool = False
enable_tokenizer_batch_encode: bool = False
disable_outlines_disk_cache: bool = False
@@ -1545,6 +1546,11 @@ class ServerArgs:
action="store_true",
help="Enable profiling of cuda graph capture.",
)
parser.add_argument(
"--enable-cudagraph-gc",
action="store_true",
help="Enable garbage collection during CUDA graph capture. If disabled (default), GC is frozen during capture to speed up the process.",
)
parser.add_argument(
"--enable-nccl-nvls",
action="store_true",