Replace time.time() to time.perf_counter() for benchmarking. (#6178)

Signed-off-by: Lifu Huang <lifu.hlf@gmail.com>
This commit is contained in:
Lifu Huang
2025-05-11 14:32:49 -07:00
committed by GitHub
parent e9a47f4cb5
commit 6e2da51561
61 changed files with 158 additions and 158 deletions

View File

@@ -164,7 +164,7 @@ def init_process_hf(
)
dist.barrier(group=group, device_ids=[rank])
torch.cuda.synchronize()
time_begin_broadcast = time.time()
time_begin_broadcast = time.perf_counter()
# The last parameter is lm_head.weight, which is tied
# with embed_tokens.weight. Actually, we only need
@@ -182,7 +182,7 @@ def init_process_hf(
group=group,
)
torch.cuda.synchronize()
time_end_broadcast = time.time()
time_end_broadcast = time.perf_counter()
# Measure the latency of broadcasting/weights update.
broadcast_time = time_end_broadcast - time_begin_broadcast
@@ -282,7 +282,7 @@ def init_process_sgl(
)
torch.cuda.synchronize()
time_begin_update = time.time()
time_begin_update = time.perf_counter()
# The last parameter is lm_head.weight, which is tied
# with embed_tokens.weight. Actually, we only need
@@ -312,7 +312,7 @@ def init_process_sgl(
},
)
torch.cuda.synchronize()
time_end_update = time.time()
time_end_update = time.perf_counter()
# Measure the latency of broadcast/weights update.
update_time = time_end_update - time_begin_update