Improve multi-node stability (#1171)

This commit is contained in:
Lianmin Zheng
2024-08-20 22:35:05 -07:00
committed by GitHub
parent cd10654e7e
commit bea2bb9eea
11 changed files with 94 additions and 76 deletions

View File

@@ -465,6 +465,7 @@ class ModelRunner:
self,
max_batch_size_to_capture=max(batch_size_list),
use_torch_compile=self.server_args.enable_torch_compile,
disable_padding=self.server_args.disable_cuda_graph_padding,
)
try:
self.cuda_graph_runner.capture(batch_size_list)