Improve torch compile for fused moe (#2327)

This commit is contained in:
Lianmin Zheng
2024-12-03 01:58:25 -08:00
committed by GitHub
parent 83b340e371
commit 07ec07ad1f
6 changed files with 45 additions and 24 deletions

View File

@@ -622,7 +622,7 @@ class ModelRunner:
tic = time.time()
logger.info("Capture cuda graph begin. This can take up to several minutes.")
self.cuda_graph_runner = CudaGraphRunner(self)
logger.info(f"Capture cuda graph end. Time elapsed: {time.time() - tic:.2f}s")
logger.info(f"Capture cuda graph end. Time elapsed: {time.time() - tic:.2f} s")
def apply_torch_tp(self):
logger.info(f"Enabling torch tensor parallelism on {self.tp_size} devices.")