Use monotonic clock for interval measurement (#6211)

Signed-off-by: Lifu Huang <lifu.hlf@gmail.com>
This commit is contained in:
Lifu Huang
2025-05-17 16:49:18 -07:00
committed by GitHub
parent 2716830802
commit 3cf1473a09
21 changed files with 72 additions and 72 deletions

View File

@@ -373,10 +373,10 @@ def latency_test_run_once(
# Prefill
synchronize(device)
tic = time.time()
tic = time.perf_counter()
next_token_ids, _, batch = extend(reqs, model_runner)
synchronize(device)
prefill_latency = time.time() - tic
prefill_latency = time.perf_counter() - tic
tot_latency += prefill_latency
throughput = input_len * batch_size / prefill_latency
rank_print(
@@ -389,10 +389,10 @@ def latency_test_run_once(
decode_latencies = []
for i in range(output_len - 1):
synchronize(device)
tic = time.time()
tic = time.perf_counter()
next_token_ids, _ = decode(next_token_ids, batch, model_runner)
synchronize(device)
latency = time.time() - tic
latency = time.perf_counter() - tic
tot_latency += latency
throughput = batch_size / latency
decode_latencies.append(latency)