Add back data parallelism (#1635)

This commit is contained in:
Lianmin Zheng
2024-10-11 07:22:48 -07:00
committed by GitHub
parent 5d09ca5735
commit 23cc66f7b6
7 changed files with 228 additions and 39 deletions

View File

@@ -141,7 +141,7 @@ class ModelRunner:
self.init_attention_backend()
def init_torch_distributed(self):
logger.info("Init torch distributed begin.")
logger.info("Init torch distributed begin.")
# Init torch distributed
if self.device == "cuda":
torch.cuda.set_device(self.gpu_id)