[Minor] Fix code style (#2311)

This commit is contained in:
Lianmin Zheng
2024-12-02 02:27:36 -08:00
committed by GitHub
parent c54bda300a
commit 18108abe5d
5 changed files with 292 additions and 317 deletions

View File

@@ -218,16 +218,6 @@ class ModelRunner:
)
self.tp_group = get_tp_group()
# Currently, there is a bug with mulit-node tensor parallelsim + padded cuda graph,
# so we disable padding in cuda graph.
if self.device == "cuda" and not all(
in_the_same_node_as(self.tp_group.cpu_group, source_rank=0)
):
self.server_args.disable_cuda_graph_padding = True
logger.info(
"Setting disable_cuda_graph_padding to True because of multi-node tensor parallelism."
)
# Check memory for tensor parallelism
if self.tp_size > 1:
local_gpu_memory = get_available_gpu_memory(self.device, self.gpu_id)