diff --git a/benchmark/bench_in_batch_prefix/bench_in_batch_prefix.py b/benchmark/bench_in_batch_prefix/bench_in_batch_prefix.py
index 86648e5ff..282097112 100644
--- a/benchmark/bench_in_batch_prefix/bench_in_batch_prefix.py
+++ b/benchmark/bench_in_batch_prefix/bench_in_batch_prefix.py
@@ -64,11 +64,11 @@ def test_batch_by_batch(all_prompts, gen_len):
 
     tot_time = 0
     for i in range(len(all_prompts)):
-        tic = time.time()
+        tic = time.perf_counter()
         text_qa.run_batch(
             list(zip(all_prompts[i], [gen_len] * len(all_prompts[i]))),
         )
-        tot_time += time.time() - tic
+        tot_time += time.perf_counter() - tic
 
     return tot_time
 
@@ -78,13 +78,13 @@ def test_batch_by_batch_with_hint(all_prompts, gen_len):
 
     tot_time = 0
     for i in range(len(all_prompts)):
-        tic = time.time()
+        tic = time.perf_counter()
         # Send a hint to cache the prefix
         text_qa.run_batch(list(zip(all_prompts[i][:1], [gen_len])))
         # Send the batch
         text_qa.run_batch(list(zip(all_prompts[i], [gen_len] * len(all_prompts[i]))))
 
-        tot_time += time.time() - tic
+        tot_time += time.perf_counter() - tic
 
     return tot_time
 
@@ -94,11 +94,11 @@ def test_send_all(all_prompts, gen_len):
 
     all_prompts = [x for prompt_list in all_prompts for x in prompt_list]
 
-    tic = time.time()
+    tic = time.perf_counter()
     text_qa.run_batch(
         list(zip(all_prompts, [gen_len] * len(all_prompts))),
     )
-    tot_time = time.time() - tic
+    tot_time = time.perf_counter() - tic
 
     return tot_time
 
diff --git a/benchmark/benchmark_batch/benchmark_batch.py b/benchmark/benchmark_batch/benchmark_batch.py
index 15ef0ab6a..a8592d48a 100644
--- a/benchmark/benchmark_batch/benchmark_batch.py
+++ b/benchmark/benchmark_batch/benchmark_batch.py
@@ -81,7 +81,7 @@ def send_batch_request(endpoint, prompts, gen_tokens, request_id):
     }
     data = {"text": prompts, "sampling_params": sampling_params}
 
-    start_time = time.time()
+    start_time = time.perf_counter()
     try:
         response = requests.post(
             endpoint.base_url + "/generate", json=data, timeout=3600
@@ -90,7 +90,7 @@ def send_batch_request(endpoint, prompts, gen_tokens, request_id):
             error = response.json()
             raise RuntimeError(f"Request {request_id} failed: {error}")
         result = response.json()
-        elapsed_time = (time.time() - start_time) * 1000  # Convert to ms
+        elapsed_time = (time.perf_counter() - start_time) * 1000  # Convert to ms
         avg_per_prompt = elapsed_time / len(prompts) if prompts else 0
         return request_id, elapsed_time, avg_per_prompt, True, len(prompts)
     except Exception as e:
@@ -104,7 +104,7 @@ def run_benchmark(endpoint, batched_prompts, batch_size, gen_tokens):
     num_requests = len(batched_prompts)
 
     # Record start time for total latency
-    benchmark_start_time = time.time()
+    benchmark_start_time = time.perf_counter()
 
     for i, batch_prompts in enumerate(batched_prompts):
         request_id = i + 1
@@ -119,7 +119,7 @@ def run_benchmark(endpoint, batched_prompts, batch_size, gen_tokens):
         results.append(result)
 
     # Calculate total latency
-    total_latency = (time.time() - benchmark_start_time) * 1000  # Convert to ms
+    total_latency = (time.perf_counter() - benchmark_start_time) * 1000  # Convert to ms
 
     return results, total_latency
 
diff --git a/benchmark/benchmark_batch/benchmark_tokenizer.py b/benchmark/benchmark_batch/benchmark_tokenizer.py
index c00bfb84b..88a5820b6 100644
--- a/benchmark/benchmark_batch/benchmark_tokenizer.py
+++ b/benchmark/benchmark_batch/benchmark_tokenizer.py
@@ -44,10 +44,10 @@ def benchmark_sequential_vs_batch(prompts, batch_size, tokenizer):
     for run in range(NUM_RUNS):
         batch_prompts = prompts[:batch_size]  # Use same prompts for fair comparison
 
-        start_time = time.time()
+        start_time = time.perf_counter()
         for prompt in batch_prompts:
             tokens = tokenizer.encode(prompt)
-        sequential_time = (time.time() - start_time) * 1000
+        sequential_time = (time.perf_counter() - start_time) * 1000
         sequential_times.append(sequential_time)
 
     # Batch tokenization using tokenizer()
@@ -55,9 +55,9 @@ def benchmark_sequential_vs_batch(prompts, batch_size, tokenizer):
     for run in range(NUM_RUNS):
         batch_prompts = prompts[:batch_size]  # Use same prompts for fair comparison
 
-        start_time = time.time()
+        start_time = time.perf_counter()
         tokens = tokenizer(batch_prompts)
-        batch_time = (time.time() - start_time) * 1000
+        batch_time = (time.perf_counter() - start_time) * 1000
         batch_times.append(batch_time)
 
     return {
diff --git a/benchmark/generative_agents/bench_other.py b/benchmark/generative_agents/bench_other.py
index 48f6ebc40..c0b3a3406 100644
--- a/benchmark/generative_agents/bench_other.py
+++ b/benchmark/generative_agents/bench_other.py
@@ -39,7 +39,7 @@ def main(args):
         answer = await call_generate(**arg, temperature=0)
         states.append(answer)
 
-    tic = time.time()
+    tic = time.perf_counter()
     # we always sequentially execute agent calls to maintain its dependency
     if args.backend != "lmql":
         for arg in tqdm(arguments):
@@ -50,7 +50,7 @@ def main(args):
         loop = asyncio.get_event_loop()
         for arg in tqdm(arguments):
             loop.run_until_complete(get_one_answer_async(arg))
-    latency = time.time() - tic
+    latency = time.perf_counter() - tic
 
     print(f"Latency: {latency:.3f}")
 
diff --git a/benchmark/generative_agents/bench_sglang.py b/benchmark/generative_agents/bench_sglang.py
index b42a32b44..034b16591 100644
--- a/benchmark/generative_agents/bench_sglang.py
+++ b/benchmark/generative_agents/bench_sglang.py
@@ -35,14 +35,14 @@ def main(args):
 
     states = []
     # Run requests
-    tic = time.time()
+    tic = time.perf_counter()
     for a in arguments:
         # only a single key in the dict
         for func, arg in a.items():
             result = func.run(**arg)
         result.sync()
         states.append(result)
-    latency = time.time() - tic
+    latency = time.perf_counter() - tic
 
     # Compute accuracy
     print(f"Latency: {latency:.3f}")
diff --git a/benchmark/gsm8k/bench_other.py b/benchmark/gsm8k/bench_other.py
index a8bbcfb5c..6dcb9ad7c 100644
--- a/benchmark/gsm8k/bench_other.py
+++ b/benchmark/gsm8k/bench_other.py
@@ -75,7 +75,7 @@ def main(args):
             )
             states[i] = answer
 
-        tic = time.time()
+        tic = time.perf_counter()
         if args.parallel == 1:
             for i in tqdm(range(len(questions))):
                 get_one_answer(i)
@@ -106,9 +106,9 @@ def main(args):
                 for j in range(len(rets)):
                     states[i + j] = rets[j]
 
-        tic = time.time()
+        tic = time.perf_counter()
         asyncio.run(batched_call(batch_size=args.parallel))
-    latency = time.time() - tic
+    latency = time.perf_counter() - tic
 
     preds = []
     for i in range(len(states)):
diff --git a/benchmark/gsm8k/bench_sglang.py b/benchmark/gsm8k/bench_sglang.py
index b6bdbef09..05ac0beb1 100644
--- a/benchmark/gsm8k/bench_sglang.py
+++ b/benchmark/gsm8k/bench_sglang.py
@@ -84,14 +84,14 @@ def main(args):
     #####################################
 
     # Run requests
-    tic = time.time()
+    tic = time.perf_counter()
     states = few_shot_gsm8k.run_batch(
         arguments,
         temperature=0,
         num_threads=args.parallel,
         progress_bar=True,
     )
-    latency = time.time() - tic
+    latency = time.perf_counter() - tic
 
     preds = []
     for i in range(len(states)):
diff --git a/benchmark/hellaswag/bench_other.py b/benchmark/hellaswag/bench_other.py
index 04be4569a..cde0794bb 100644
--- a/benchmark/hellaswag/bench_other.py
+++ b/benchmark/hellaswag/bench_other.py
@@ -57,7 +57,7 @@ def main(args):
                 context=few_shot_examples + questions[i], choices=choices[i]
             )
 
-        tic = time.time()
+        tic = time.perf_counter()
         if args.parallel == 1:
             for i in tqdm(range(len(questions))):
                 get_one_answer(i)
@@ -82,10 +82,10 @@ def main(args):
                 for j in range(len(rets)):
                     preds[i + j] = rets[j]
 
-        tic = time.time()
+        tic = time.perf_counter()
         asyncio.run(batched_call(batch_size=args.parallel))
 
-    latency = time.time() - tic
+    latency = time.perf_counter() - tic
 
     # Compute accuracy
     acc = np.mean(np.array(preds) == np.array(labels))
diff --git a/benchmark/hellaswag/bench_sglang.py b/benchmark/hellaswag/bench_sglang.py
index 798521f97..6345a453b 100644
--- a/benchmark/hellaswag/bench_sglang.py
+++ b/benchmark/hellaswag/bench_sglang.py
@@ -68,7 +68,7 @@ def main(args):
     #####################################
 
     # Run requests
-    tic = time.time()
+    tic = time.perf_counter()
     rets = few_shot_hellaswag.run_batch(
         arguments,
         temperature=0,
@@ -76,7 +76,7 @@ def main(args):
         progress_bar=True,
     )
     preds = [choices[i].index(rets[i]["answer"]) for i in range(len(rets))]
-    latency = time.time() - tic
+    latency = time.perf_counter() - tic
 
     # Compute accuracy
     acc = np.mean(np.array(preds) == np.array(labels))
diff --git a/benchmark/hicache/bench_multiturn.py b/benchmark/hicache/bench_multiturn.py
index 6bd0bd99e..a2a88b634 100644
--- a/benchmark/hicache/bench_multiturn.py
+++ b/benchmark/hicache/bench_multiturn.py
@@ -261,7 +261,7 @@ class WorkloadGenerator:
             client_id, payload = item
             response = await async_request_sglang_generate(payload, self.url, self.pbar)
             if self.pbar.n == self.pbar.total:
-                self.finished_time = time.time()
+                self.finished_time = time.perf_counter()
             self.response_queue.put((client_id, response))
         except Exception as e:
             print(f"Request failed: {e}")
@@ -334,7 +334,7 @@ class WorkloadGenerator:
         request_thread = threading.Thread(target=self.request_sender, daemon=True)
         response_thread = threading.Thread(target=self.response_handler, daemon=True)
 
-        self.start_time = time.time()
+        self.start_time = time.perf_counter()
         request_thread.start()
         response_thread.start()
 
diff --git a/benchmark/json_decode_regex/bench_other.py b/benchmark/json_decode_regex/bench_other.py
index d80ea1de7..87051ea82 100644
--- a/benchmark/json_decode_regex/bench_other.py
+++ b/benchmark/json_decode_regex/bench_other.py
@@ -53,7 +53,7 @@ def main(args):
     def get_one_answer(i):
         states[i] = json_decode(generate=call_generate, **arguments[i])
 
-    tic = time.time()
+    tic = time.perf_counter()
     if args.parallel == 1:
         for i in tqdm(range(len(arguments))):
             get_one_answer(i)
@@ -68,7 +68,7 @@ def main(args):
             for _ in rets:
                 pass
 
-    latency = time.time() - tic
+    latency = time.perf_counter() - tic
 
     # Compute accuracy
     print(f"Latency: {latency:.3f}")
diff --git a/benchmark/json_decode_regex/bench_sglang.py b/benchmark/json_decode_regex/bench_sglang.py
index 4139ebf8a..9aab11e43 100644
--- a/benchmark/json_decode_regex/bench_sglang.py
+++ b/benchmark/json_decode_regex/bench_sglang.py
@@ -63,11 +63,11 @@ def main(args):
     json_warm_up.run().sync()
 
     # Run requests
-    tic = time.time()
+    tic = time.perf_counter()
     states = json_decode.run_batch(
         arguments, temperature=0, num_threads=args.parallel, progress_bar=True
     )
-    latency = time.time() - tic
+    latency = time.perf_counter() - tic
 
     # Compute accuracy
     print(f"Latency: {latency:.3f}")
diff --git a/benchmark/json_jump_forward/bench_other.py b/benchmark/json_jump_forward/bench_other.py
index 9eb5c58b3..a64e950d7 100644
--- a/benchmark/json_jump_forward/bench_other.py
+++ b/benchmark/json_jump_forward/bench_other.py
@@ -175,7 +175,7 @@ def bench_character(args):
     else:
         raise ValueError(f"Invalid backend: {args.backend}")
 
-    tic = time.time()
+    tic = time.perf_counter()
 
     if args.backend != "lmql":
         if args.parallel == 1:
@@ -202,7 +202,7 @@ def bench_character(args):
                 asyncio.gather(*[get_one_answer_async(i) for i in bt])
             )
 
-    latency = time.time() - tic
+    latency = time.perf_counter() - tic
 
     return states, latency
 
@@ -236,7 +236,7 @@ def bench_city_doc(args):
     else:
         raise ValueError(f"Invalid backend: {args.backend}")
 
-    tic = time.time()
+    tic = time.perf_counter()
     if args.parallel == 1:
         for i in tqdm(range(len(arguments))):
             get_one_answer(i)
@@ -246,7 +246,7 @@ def bench_city_doc(args):
             for _ in rets:
                 pass
 
-    latency = time.time() - tic
+    latency = time.perf_counter() - tic
 
     return states, latency
 
diff --git a/benchmark/json_jump_forward/bench_sglang.py b/benchmark/json_jump_forward/bench_sglang.py
index 10cf2699b..29f635f75 100644
--- a/benchmark/json_jump_forward/bench_sglang.py
+++ b/benchmark/json_jump_forward/bench_sglang.py
@@ -67,14 +67,14 @@ def bench_city_doc(args):
     sgl.set_default_backend(backend)
 
     # Run requests
-    tic = time.time()
+    tic = time.perf_counter()
     states = city_gen.run_batch(
         arguments,
         temperature=0,
         num_threads=args.parallel,
         progress_bar=True,
     )
-    latency = time.time() - tic
+    latency = time.perf_counter() - tic
 
     return states, latency
 
@@ -91,14 +91,14 @@ def bench_character(args):
     sgl.set_default_backend(backend)
 
     # Run requests
-    tic = time.time()
+    tic = time.perf_counter()
     states = character_gen.run_batch(
         arguments,
         temperature=0,
         num_threads=args.parallel,
         progress_bar=True,
     )
-    latency = time.time() - tic
+    latency = time.perf_counter() - tic
 
     return states, latency
 
diff --git a/benchmark/json_schema/bench_sglang.py b/benchmark/json_schema/bench_sglang.py
index 4693baae3..55365ff2e 100644
--- a/benchmark/json_schema/bench_sglang.py
+++ b/benchmark/json_schema/bench_sglang.py
@@ -85,14 +85,14 @@ def bench_schema(args):
     sgl.set_default_backend(backend)
 
     # Run requests
-    tic = time.time()
+    tic = time.perf_counter()
     states = schema_gen.run_batch(
         arguments,
         temperature=0,
         num_threads=args.parallel,
         progress_bar=True,
     )
-    latency = time.time() - tic
+    latency = time.perf_counter() - tic
 
     # Check if the outputs are valid
     indexes = []
diff --git a/benchmark/kernels/fused_moe_triton/tuning_fused_moe_triton.py b/benchmark/kernels/fused_moe_triton/tuning_fused_moe_triton.py
index a3ead1eca..be349e456 100644
--- a/benchmark/kernels/fused_moe_triton/tuning_fused_moe_triton.py
+++ b/benchmark/kernels/fused_moe_triton/tuning_fused_moe_triton.py
@@ -487,7 +487,7 @@ def main(args: argparse.Namespace):
             ]
         print(f"Start tuning over {len(search_space)} configurations...")
 
-        start = time.time()
+        start = time.perf_counter()
         configs = _distribute(
             "tune",
             [
@@ -522,7 +522,7 @@ def main(args: argparse.Namespace):
             use_int8_w8a16,
             block_shape,
         )
-        end = time.time()
+        end = time.perf_counter()
         print(f"Tuning took {end - start:.2f} seconds")
     else:
         outputs = _distribute(
diff --git a/benchmark/kernels/quantization/tuning_block_wise_kernel.py b/benchmark/kernels/quantization/tuning_block_wise_kernel.py
index 7b0dfb47a..1b51e54b7 100644
--- a/benchmark/kernels/quantization/tuning_block_wise_kernel.py
+++ b/benchmark/kernels/quantization/tuning_block_wise_kernel.py
@@ -359,7 +359,7 @@ def tune_on_gpu(args_dict):
         config for config in search_space if block_k % config["BLOCK_SIZE_K"] == 0
     ]
 
-    start = time.time()
+    start = time.perf_counter()
     results = {}
     for shape in tqdm(weight_shapes, desc=f"GPU {gpu_id} - Shapes"):
         N, K = shape[0], shape[1]
@@ -379,7 +379,7 @@ def tune_on_gpu(args_dict):
         best_configs = {M: config for M, config in zip(batch_sizes, benchmark_results)}
         save_configs(N, K, block_n, block_k, best_configs, save_path, input_type)
 
-    end = time.time()
+    end = time.perf_counter()
     print(f"Tuning on GPU {gpu_id} took {end - start:.2f} seconds")
 
 
diff --git a/benchmark/line_retrieval/bench_sglang.py b/benchmark/line_retrieval/bench_sglang.py
index 922d5009d..e974e7dd3 100644
--- a/benchmark/line_retrieval/bench_sglang.py
+++ b/benchmark/line_retrieval/bench_sglang.py
@@ -70,7 +70,7 @@ def eval_model(args, line_obj, num_hoops, src_indices, dst_percents):
     # Select backend
     backend = select_sglang_backend(args)
 
-    tic = time.time()
+    tic = time.perf_counter()
     states = line_retrieval.run_batch(
         arguments,
         temperature=0,
@@ -78,7 +78,7 @@ def eval_model(args, line_obj, num_hoops, src_indices, dst_percents):
         num_threads=args.parallel,
         progress_bar=True,
     )
-    latency = time.time() - tic
+    latency = time.perf_counter() - tic
 
     corrects = []
     for i in range(len(arguments)):
diff --git a/benchmark/llava_bench/bench_sglang.py b/benchmark/llava_bench/bench_sglang.py
index f84c8a90f..b9e8c1405 100644
--- a/benchmark/llava_bench/bench_sglang.py
+++ b/benchmark/llava_bench/bench_sglang.py
@@ -41,7 +41,7 @@ def main(args):
     sgl.set_default_backend(backend)
 
     # Run requests
-    tic = time.time()
+    tic = time.perf_counter()
     if args.parallel == 1:
         for i in tqdm.tqdm(range(len(lines))):
             image_file = arguments[i]["image_file"]
@@ -52,7 +52,7 @@ def main(args):
         states = image_qa.run_batch(
             arguments, temperature=0, num_threads=args.parallel, progress_bar=True
         )
-    latency = time.time() - tic
+    latency = time.perf_counter() - tic
 
     print(f"Latency: {latency:.3f}")
 
diff --git a/benchmark/llm_judge/bench_other.py b/benchmark/llm_judge/bench_other.py
index 2231bcdbb..8e6029067 100644
--- a/benchmark/llm_judge/bench_other.py
+++ b/benchmark/llm_judge/bench_other.py
@@ -85,7 +85,7 @@ def main(args):
     call_generate = partial(get_call_generate(args), temperature=0)
 
     # Run requests
-    tic = time.time()
+    tic = time.perf_counter()
 
     if args.backend != "lmql":
 
@@ -120,7 +120,7 @@ def main(args):
                 asyncio.gather(*[get_one_answer_async(i) for i in bt])
             )
 
-    latency = time.time() - tic
+    latency = time.perf_counter() - tic
 
     # Compute accuracy
     print(f"Latency: {latency:.3f}")
diff --git a/benchmark/llm_judge/bench_sglang.py b/benchmark/llm_judge/bench_sglang.py
index 38c95974e..97e6c3979 100644
--- a/benchmark/llm_judge/bench_sglang.py
+++ b/benchmark/llm_judge/bench_sglang.py
@@ -59,7 +59,7 @@ def main(args):
     backend = select_sglang_backend(args)
 
     # Run requests
-    tic = time.time()
+    tic = time.perf_counter()
     states = multi_dimension_judge.run_batch(
         arguments,
         temperature=0,
@@ -67,7 +67,7 @@ def main(args):
         num_threads=args.parallel,
         progress_bar=True,
     )
-    latency = time.time() - tic
+    latency = time.perf_counter() - tic
 
     print(f"Latency: {latency:.3f}")
 
diff --git a/benchmark/long_json_decode/bench_other.py b/benchmark/long_json_decode/bench_other.py
index a83c797c4..0ad38a014 100644
--- a/benchmark/long_json_decode/bench_other.py
+++ b/benchmark/long_json_decode/bench_other.py
@@ -45,7 +45,7 @@ def main(args):
     def get_one_answer(i):
         states[i] = json_decode(generate=call_generate, **arguments[i])
 
-    tic = time.time()
+    tic = time.perf_counter()
     if args.parallel == 1:
         for i in tqdm(range(len(arguments))):
             get_one_answer(i)
@@ -58,7 +58,7 @@ def main(args):
                 )
             )
 
-    latency = time.time() - tic
+    latency = time.perf_counter() - tic
 
     # Compute accuracy
     print(f"Latency: {latency:.3f}")
diff --git a/benchmark/long_json_decode/bench_sglang.py b/benchmark/long_json_decode/bench_sglang.py
index 6e19a732f..8394cfc2e 100644
--- a/benchmark/long_json_decode/bench_sglang.py
+++ b/benchmark/long_json_decode/bench_sglang.py
@@ -46,11 +46,11 @@ def main(args):
     sgl.set_default_backend(backend)
 
     # Run requests
-    tic = time.time()
+    tic = time.perf_counter()
     states = json_decode.run_batch(
         arguments, temperature=0, num_threads=args.parallel, progress_bar=True
     )
-    latency = time.time() - tic
+    latency = time.perf_counter() - tic
 
     # Compute accuracy
     print(f"Latency: {latency:.3f}")
diff --git a/benchmark/mmlu/bench_other.py b/benchmark/mmlu/bench_other.py
index c5d48dac6..f1b166c2b 100644
--- a/benchmark/mmlu/bench_other.py
+++ b/benchmark/mmlu/bench_other.py
@@ -76,7 +76,7 @@ def evaluate(args, subject, dev_df, test_df, call_generate):
             pred = call_generate(prompts[i], temperature=0, max_tokens=max_tokens)
             preds[i] = pred.strip()[0]
 
-        tic = time.time()
+        tic = time.perf_counter()
         if args.parallel == 1:
             for i in range(len(prompts)):
                 get_one_answer(i)
@@ -94,9 +94,9 @@ def evaluate(args, subject, dev_df, test_df, call_generate):
                 for j in range(len(rets)):
                     preds[i + j] = rets[j].strip()[0]
 
-        tic = time.time()
+        tic = time.perf_counter()
         asyncio.run(batched_call(batch_size=args.parallel))
-    latency = time.time() - tic
+    latency = time.perf_counter() - tic
 
     # Compute accuracy
     cors = [pred == label for pred, label in zip(preds, labels)]
diff --git a/benchmark/mmlu/bench_sglang.py b/benchmark/mmlu/bench_sglang.py
index 210b6111e..0bae7b6e4 100644
--- a/benchmark/mmlu/bench_sglang.py
+++ b/benchmark/mmlu/bench_sglang.py
@@ -116,7 +116,7 @@ def main(args):
     backend = select_sglang_backend(args)
 
     # Run
-    tic = time.time()
+    tic = time.perf_counter()
     states = few_shot_mmlu.run_batch(
         arguments,
         temperature=0,
@@ -128,7 +128,7 @@ def main(args):
     preds = [
         s["answer"].strip()[0] if len(s["answer"].strip()) > 0 else "" for s in states
     ]
-    latency = time.time() - tic
+    latency = time.perf_counter() - tic
 
     # Compute accuracy
     cors = [pred == label for pred, label in zip(preds, labels)]
diff --git a/benchmark/mmmu/bench_sglang.py b/benchmark/mmmu/bench_sglang.py
index 58a4039ef..a177fd137 100644
--- a/benchmark/mmmu/bench_sglang.py
+++ b/benchmark/mmmu/bench_sglang.py
@@ -119,7 +119,7 @@ async def eval_mmmu(args) -> None:
         api_key="sk", base_url=f"http://127.0.0.1:{args.port}/v1"
     )
     semaphore = asyncio.Semaphore(args.concurrency)
-    start = time.time()
+    start = time.perf_counter()
     base_url = f"http://127.0.0.1:{args.port}"
 
     if args.profile:
@@ -147,7 +147,7 @@ async def eval_mmmu(args) -> None:
         if profile_output.success:
             print("Profiler stopped")
 
-    print(f"Benchmark time: {time.time() - start}")
+    print(f"Benchmark time: {time.perf_counter() - start}")
     args.output_path = f"./val_sglang.json"
     save_json(args.output_path, out_samples)
     eval_result(model_answer_path=args.output_path, answer_dict=answer_dict)
diff --git a/benchmark/mtbench/bench_other.py b/benchmark/mtbench/bench_other.py
index 2c321e8a1..5e579e9a6 100644
--- a/benchmark/mtbench/bench_other.py
+++ b/benchmark/mtbench/bench_other.py
@@ -66,7 +66,7 @@ def main(args):
         answers[i] = cur_answers
 
     # Run requests
-    tic = time.time()
+    tic = time.perf_counter()
     if args.parallel == 1:
         for i in tqdm(range(len(questions))):
             get_answer(i)
@@ -79,7 +79,7 @@ def main(args):
                 )
             )
 
-    latency = time.time() - tic
+    latency = time.perf_counter() - tic
 
     print(f"#questions: {len(questions)}, Latency: {latency:.2f}")
 
diff --git a/benchmark/mtbench/bench_sglang.py b/benchmark/mtbench/bench_sglang.py
index b57d1647d..0d0545b3a 100644
--- a/benchmark/mtbench/bench_sglang.py
+++ b/benchmark/mtbench/bench_sglang.py
@@ -57,7 +57,7 @@ def main(args):
     sgl.set_default_backend(backend)
 
     # Run requests
-    tic = time.time()
+    tic = time.perf_counter()
     rets = answer_mt_bench.run_batch(
         arguments,
         temperature=0,
@@ -66,7 +66,7 @@ def main(args):
         progress_bar=True,
     )
     answers = [[s["answer_1"], s["answer_2"]] for s in rets]
-    latency = time.time() - tic
+    latency = time.perf_counter() - tic
 
     print(f"#questions: {len(questions)}, Latency: {latency:.2f}")
 
diff --git a/benchmark/mtbench/bench_sglang_eagle.py b/benchmark/mtbench/bench_sglang_eagle.py
index e1207afe1..3eb6036c7 100644
--- a/benchmark/mtbench/bench_sglang_eagle.py
+++ b/benchmark/mtbench/bench_sglang_eagle.py
@@ -68,7 +68,7 @@ def main(args):
     sgl.set_default_backend(backend)
 
     # Run requests
-    tic = time.time()
+    tic = time.perf_counter()
     rets = answer_mt_bench.run_batch(
         arguments,
         temperature=0,
@@ -78,7 +78,7 @@ def main(args):
     )
     answers = [[s["answer_1"], s["answer_2"]] for s in rets]
 
-    latency = time.time() - tic
+    latency = time.perf_counter() - tic
     num_output_tokens = sum(
         s.get_meta_info("answer_1")["completion_tokens"]
         + s.get_meta_info("answer_2")["completion_tokens"]
diff --git a/benchmark/multi_chain_reasoning/bench_other.py b/benchmark/multi_chain_reasoning/bench_other.py
index e0ff2be45..f361496ad 100644
--- a/benchmark/multi_chain_reasoning/bench_other.py
+++ b/benchmark/multi_chain_reasoning/bench_other.py
@@ -113,7 +113,7 @@ def main(args):
             answer = multi_chain_gsm8k(questions[i], args.num_chains, call_generate)
             states[i] = answer
 
-        tic = time.time()
+        tic = time.perf_counter()
         if args.parallel == 1:
             for i in tqdm(range(len(questions))):
                 get_one_answer(i)
@@ -134,7 +134,7 @@ def main(args):
             )
             states[i] = answer
 
-        tic = time.time()
+        tic = time.perf_counter()
         loop = asyncio.get_event_loop()
         batches = [
             list(range(i, min(i + args.parallel, len(questions))))
@@ -144,7 +144,7 @@ def main(args):
             tasks = [get_one_answer_asyncio(k) for k in bt]
             loop.run_until_complete(asyncio.gather(*tasks))
 
-    latency = time.time() - tic
+    latency = time.perf_counter() - tic
 
     preds = []
     for i in range(len(states)):
diff --git a/benchmark/multi_chain_reasoning/bench_sglang.py b/benchmark/multi_chain_reasoning/bench_sglang.py
index 98a6b511e..1d3129db2 100644
--- a/benchmark/multi_chain_reasoning/bench_sglang.py
+++ b/benchmark/multi_chain_reasoning/bench_sglang.py
@@ -90,7 +90,7 @@ def main(args):
     backend = select_sglang_backend(args)
 
     # Run requests
-    tic = time.time()
+    tic = time.perf_counter()
     states = multi_chain_gsm8k.run_batch(
         arguments,
         temperature=0,
@@ -98,7 +98,7 @@ def main(args):
         num_threads=args.parallel,
         progress_bar=True,
     )
-    latency = time.time() - tic
+    latency = time.perf_counter() - tic
 
     preds = []
     for i in range(len(states)):
diff --git a/benchmark/multi_document_qa/bench_other.py b/benchmark/multi_document_qa/bench_other.py
index 6f0addcb7..627837c5c 100644
--- a/benchmark/multi_document_qa/bench_other.py
+++ b/benchmark/multi_document_qa/bench_other.py
@@ -61,7 +61,7 @@ def main(args):
     def get_one_answer(i):
         states[i] = multi_document_qa(generate=call_generate, **arguments[i])
 
-    tic = time.time()
+    tic = time.perf_counter()
     if args.parallel == 1:
         for i in tqdm(range(len(labels))):
             get_one_answer(i)
@@ -74,7 +74,7 @@ def main(args):
                 )
             )
 
-    latency = time.time() - tic
+    latency = time.perf_counter() - tic
 
     # Compute accuracy
     print(states)
diff --git a/benchmark/multi_document_qa/bench_sglang.py b/benchmark/multi_document_qa/bench_sglang.py
index 645520166..0b4b0dbc6 100644
--- a/benchmark/multi_document_qa/bench_sglang.py
+++ b/benchmark/multi_document_qa/bench_sglang.py
@@ -49,11 +49,11 @@ def main(args):
     sgl.set_default_backend(backend)
 
     # Run requests
-    tic = time.time()
+    tic = time.perf_counter()
     states = multi_document_qa.run_batch(
         arguments, temperature=0, num_threads=args.parallel, progress_bar=True
     )
-    latency = time.time() - tic
+    latency = time.perf_counter() - tic
 
     # Compute accuracy
     print([s["answer"] for s in states])
diff --git a/benchmark/multi_turn_chat/bench_other.py b/benchmark/multi_turn_chat/bench_other.py
index 81d67ab7b..9189af5be 100644
--- a/benchmark/multi_turn_chat/bench_other.py
+++ b/benchmark/multi_turn_chat/bench_other.py
@@ -35,7 +35,7 @@ def main(args):
     def get_one_answer(i):
         states[i] = multi_turns(generate=call_generate, **multi_qas[i])
 
-    tic = time.time()
+    tic = time.perf_counter()
     if args.parallel == 1:
         for i in tqdm(range(len(multi_qas))):
             get_one_answer(i)
@@ -50,7 +50,7 @@ def main(args):
             for _ in rets:
                 pass
 
-    latency = time.time() - tic
+    latency = time.perf_counter() - tic
 
     # Compute accuracy
     print(f"Latency: {latency:.3f}")
diff --git a/benchmark/multi_turn_chat/bench_sglang.py b/benchmark/multi_turn_chat/bench_sglang.py
index 7feaced73..1051bf19e 100644
--- a/benchmark/multi_turn_chat/bench_sglang.py
+++ b/benchmark/multi_turn_chat/bench_sglang.py
@@ -27,7 +27,7 @@ def main(args):
 
     backend = select_sglang_backend(args)
 
-    tic = time.time()
+    tic = time.perf_counter()
     states = multi_turns.run_batch(
         multi_qas,
         temperature=0,
@@ -35,7 +35,7 @@ def main(args):
         num_threads=args.parallel,
         progress_bar=True,
     )
-    latency = time.time() - tic
+    latency = time.perf_counter() - tic
 
     print(f"Latency: {latency:.3f}")
 
diff --git a/benchmark/multi_turn_chat/long_prompt_multi_turn.py b/benchmark/multi_turn_chat/long_prompt_multi_turn.py
index 20f6dd5e3..bda5bb9cc 100644
--- a/benchmark/multi_turn_chat/long_prompt_multi_turn.py
+++ b/benchmark/multi_turn_chat/long_prompt_multi_turn.py
@@ -84,7 +84,7 @@ def main(args):
 
     backend = select_sglang_backend(args)
 
-    tic = time.time()
+    tic = time.perf_counter()
     states = multi_turns.run_batch(
         multi_qas,
         temperature=0,
@@ -92,7 +92,7 @@ def main(args):
         num_threads="auto",
         progress_bar=True,
     )
-    latency = time.time() - tic
+    latency = time.perf_counter() - tic
 
     print(f"Latency: {latency:.3f}")
 
diff --git a/benchmark/react/bench_other.py b/benchmark/react/bench_other.py
index 91c5546f1..08666662b 100644
--- a/benchmark/react/bench_other.py
+++ b/benchmark/react/bench_other.py
@@ -146,7 +146,7 @@ def main(args):
 
             states.append(answer)
 
-    tic = time.time()
+    tic = time.perf_counter()
 
     if args.backend != "lmql":
         if args.parallel == 1:
@@ -173,7 +173,7 @@ def main(args):
             tasks = [run_single_agent_async(arg) for arg in bt]
             loop.run_until_complete(asyncio.gather(*tasks))
 
-    latency = time.time() - tic
+    latency = time.perf_counter() - tic
 
     print(f"Latency: {latency:.3f}")
 
diff --git a/benchmark/react/bench_sglang.py b/benchmark/react/bench_sglang.py
index b07105e2c..331638e9f 100644
--- a/benchmark/react/bench_sglang.py
+++ b/benchmark/react/bench_sglang.py
@@ -115,14 +115,14 @@ def main(args):
     sgl.set_default_backend(backend)
 
     states = []
-    tic = time.time()
+    tic = time.perf_counter()
     states = webthink.run_batch(
         arguments,
         temperature=0,
         num_threads=args.parallel,
         progress_bar=True,
     )
-    latency = time.time() - tic
+    latency = time.perf_counter() - tic
 
     # Compute accuracy
     print(f"Latency: {latency:.3f}")
diff --git a/benchmark/reasoning_benchmark/bench_sglang.py b/benchmark/reasoning_benchmark/bench_sglang.py
index c83204960..ccbff9d17 100644
--- a/benchmark/reasoning_benchmark/bench_sglang.py
+++ b/benchmark/reasoning_benchmark/bench_sglang.py
@@ -51,7 +51,7 @@ def main(args):
     )
 
     # Run requests
-    tic = time.time()
+    tic = time.perf_counter()
     states = reasoning_gen.run_batch(
         questions,
         num_threads=args.parallel,
@@ -60,7 +60,7 @@ def main(args):
         max_new_tokens=32768,
         top_p=0.95,
     )
-    latency = time.time() - tic
+    latency = time.perf_counter() - tic
 
     # Extract results and record outcomes in a list.
     outcomes = []
diff --git a/benchmark/tip_suggestion/bench_other.py b/benchmark/tip_suggestion/bench_other.py
index fcc4fd624..2630081bd 100644
--- a/benchmark/tip_suggestion/bench_other.py
+++ b/benchmark/tip_suggestion/bench_other.py
@@ -68,7 +68,7 @@ def main(args):
     call_generate = partial(get_call_generate(args), temperature=0)
 
     # Run requests
-    tic = time.time()
+    tic = time.perf_counter()
     if args.backend != "lmql":
 
         def get_one_answer(i):
@@ -102,7 +102,7 @@ def main(args):
             loop.run_until_complete(
                 asyncio.gather(*[get_one_answer_async(i) for i in batch])
             )
-    latency = time.time() - tic
+    latency = time.perf_counter() - tic
 
     # Compute accuracy
     print(f"Latency: {latency:.3f}")
diff --git a/benchmark/tip_suggestion/bench_sglang.py b/benchmark/tip_suggestion/bench_sglang.py
index 6d17821bc..86c476f97 100644
--- a/benchmark/tip_suggestion/bench_sglang.py
+++ b/benchmark/tip_suggestion/bench_sglang.py
@@ -65,11 +65,11 @@ def main(args):
     sgl.set_default_backend(select_sglang_backend(args))
 
     # Run requests
-    tic = time.time()
+    tic = time.perf_counter()
     states = suggest_tips.run_batch(
         arguments, temperature=0, num_threads=args.parallel, progress_bar=True
     )
-    latency = time.time() - tic
+    latency = time.perf_counter() - tic
 
     # Compute accuracy
     print(f"Latency: {latency:.3f}")
diff --git a/benchmark/tree_of_thought_deep/bench_other.py b/benchmark/tree_of_thought_deep/bench_other.py
index 21c7df351..0ef8c6360 100644
--- a/benchmark/tree_of_thought_deep/bench_other.py
+++ b/benchmark/tree_of_thought_deep/bench_other.py
@@ -138,7 +138,7 @@ def main(args):
     # Run requests
     states = [None] * len(questions)
 
-    tic = time.time()
+    tic = time.perf_counter()
     if args.backend != "lmql":
 
         def get_one_answer(i):
@@ -177,7 +177,7 @@ def main(args):
             tasks = [get_one_answer_async(k) for k in bt]
             loop.run_until_complete(asyncio.gather(*tasks))
 
-    latency = time.time() - tic
+    latency = time.perf_counter() - tic
 
     answers_text = []
     for s in states:
diff --git a/benchmark/tree_of_thought_deep/bench_sglang.py b/benchmark/tree_of_thought_deep/bench_sglang.py
index bfb2a4113..bcdb6e54d 100644
--- a/benchmark/tree_of_thought_deep/bench_sglang.py
+++ b/benchmark/tree_of_thought_deep/bench_sglang.py
@@ -119,7 +119,7 @@ def main(args):
     backend = select_sglang_backend(args)
 
     # Run requests
-    tic = time.time()
+    tic = time.perf_counter()
     states = tree_search.run_batch(
         arguments,
         temperature=0,
@@ -127,7 +127,7 @@ def main(args):
         num_threads=args.parallel,
         progress_bar=True,
     )
-    latency = time.time() - tic
+    latency = time.perf_counter() - tic
     answers_text = []
     for s in states:
         answers_text.append([x for xs in s.ret_value for x in xs])
diff --git a/benchmark/tree_of_thought_v0/bench_other.py b/benchmark/tree_of_thought_v0/bench_other.py
index 86e133577..703ecd7f4 100644
--- a/benchmark/tree_of_thought_v0/bench_other.py
+++ b/benchmark/tree_of_thought_v0/bench_other.py
@@ -121,7 +121,7 @@ def main(args):
     def get_one_answer(i):
         states[i] = tree_search(**arguments[i], call_generate=call_generate)
 
-    tic = time.time()
+    tic = time.perf_counter()
     if args.parallel == 1:
         for i in tqdm(range(len(questions))):
             get_one_answer(i)
@@ -134,7 +134,7 @@ def main(args):
                 )
             )
 
-    latency = time.time() - tic
+    latency = time.perf_counter() - tic
 
     answers_text = []
     for s in states:
diff --git a/benchmark/tree_of_thought_v0/bench_sglang.py b/benchmark/tree_of_thought_v0/bench_sglang.py
index f0d130778..6d7575f36 100644
--- a/benchmark/tree_of_thought_v0/bench_sglang.py
+++ b/benchmark/tree_of_thought_v0/bench_sglang.py
@@ -107,7 +107,7 @@ def main(args):
     backend = select_sglang_backend(args)
 
     # Run requests
-    tic = time.time()
+    tic = time.perf_counter()
     states = tree_search.run_batch(
         arguments,
         temperature=0,
@@ -115,7 +115,7 @@ def main(args):
         num_threads=args.parallel,
         progress_bar=True,
     )
-    latency = time.time() - tic
+    latency = time.perf_counter() - tic
     answers_text = []
     for s in states:
         answers_text.append([x for xs in s["answer"] for x in xs])
diff --git a/python/sglang/test/few_shot_gsm8k.py b/python/sglang/test/few_shot_gsm8k.py
index 4f655eb60..5aac87bd2 100644
--- a/python/sglang/test/few_shot_gsm8k.py
+++ b/python/sglang/test/few_shot_gsm8k.py
@@ -90,7 +90,7 @@ def run_eval(args):
     #####################################
 
     # Run requests
-    tic = time.time()
+    tic = time.perf_counter()
     states = few_shot_gsm8k.run_batch(
         arguments,
         temperature=args.temperature if hasattr(args, "temperature") else 0,
@@ -99,7 +99,7 @@ def run_eval(args):
         return_logprob=getattr(args, "return_logprob", None),
         logprob_start_len=getattr(args, "logprob_start_len", None),
     )
-    latency = time.time() - tic
+    latency = time.perf_counter() - tic
 
     preds = []
     for i in range(len(states)):
diff --git a/python/sglang/test/few_shot_gsm8k_engine.py b/python/sglang/test/few_shot_gsm8k_engine.py
index 67844e2f1..2453a91e4 100644
--- a/python/sglang/test/few_shot_gsm8k_engine.py
+++ b/python/sglang/test/few_shot_gsm8k_engine.py
@@ -89,7 +89,7 @@ def run_eval(args):
     }
 
     # Run requests
-    tic = time.time()
+    tic = time.perf_counter()
 
     loop = asyncio.get_event_loop()
 
@@ -98,7 +98,7 @@ def run_eval(args):
     )
 
     # End requests
-    latency = time.time() - tic
+    latency = time.perf_counter() - tic
 
     # Shutdown the engine
     engine.shutdown()
diff --git a/python/sglang/test/run_eval.py b/python/sglang/test/run_eval.py
index fe88171ce..51743be09 100644
--- a/python/sglang/test/run_eval.py
+++ b/python/sglang/test/run_eval.py
@@ -71,9 +71,9 @@ def run_eval(args):
     )
 
     # Run eval
-    tic = time.time()
+    tic = time.perf_counter()
     result = eval_obj(sampler)
-    latency = time.time() - tic
+    latency = time.perf_counter() - tic
 
     # Dump reports
     metrics = result.metrics | {"score": result.score}
diff --git a/python/sglang/test/test_programs.py b/python/sglang/test/test_programs.py
index 262637eed..6756f2dd7 100644
--- a/python/sglang/test/test_programs.py
+++ b/python/sglang/test/test_programs.py
@@ -503,7 +503,7 @@ def test_hellaswag_select():
     #####################################
 
     # Run requests
-    tic = time.time()
+    tic = time.perf_counter()
     rets = few_shot_hellaswag.run_batch(
         arguments,
         temperature=0,
@@ -514,13 +514,13 @@ def test_hellaswag_select():
     preds = []
     for i, ret in enumerate(rets):
         preds.append(choices[i].index(ret["answer"]))
-    latency = time.time() - tic
+    latency = time.perf_counter() - tic
 
     # Compute accuracy
     accuracy = np.mean(np.array(preds) == np.array(labels))
 
     # Test generator style of run_batch
-    tic = time.time()
+    tic = time.perf_counter()
     rets = few_shot_hellaswag.run_batch(
         arguments,
         temperature=0,
@@ -531,7 +531,7 @@ def test_hellaswag_select():
     preds_gen = []
     for i, ret in enumerate(rets):
         preds_gen.append(choices[i].index(ret["answer"]))
-    latency_gen = time.time() - tic
+    latency_gen = time.perf_counter() - tic
 
     # Compute accuracy
     accuracy_gen = np.mean(np.array(preds_gen) == np.array(labels))
diff --git a/python/sglang/test/test_utils.py b/python/sglang/test/test_utils.py
index 1e78d6dc1..150f385c9 100644
--- a/python/sglang/test/test_utils.py
+++ b/python/sglang/test/test_utils.py
@@ -449,9 +449,9 @@ def popen_launch_server(
     else:
         process = subprocess.Popen(command, stdout=None, stderr=None, env=env)
 
-    start_time = time.time()
+    start_time = time.perf_counter()
     with requests.Session() as session:
-        while time.time() - start_time < timeout:
+        while time.perf_counter() - start_time < timeout:
             try:
                 headers = {
                     "Content-Type": "application/json; charset=utf-8",
@@ -584,7 +584,7 @@ class TestFile:
 
 
 def run_unittest_files(files: List[TestFile], timeout_per_file: float):
-    tic = time.time()
+    tic = time.perf_counter()
     success = True
 
     for i, file in enumerate(files):
@@ -599,13 +599,13 @@ def run_unittest_files(files: List[TestFile], timeout_per_file: float):
                 f".\n.\nBegin ({i}/{len(files) - 1}):\npython3 {filename}\n.\n.\n",
                 flush=True,
             )
-            tic = time.time()
+            tic = time.perf_counter()
 
             process = subprocess.Popen(
                 ["python3", filename], stdout=None, stderr=None, env=os.environ
             )
             process.wait()
-            elapsed = time.time() - tic
+            elapsed = time.perf_counter() - tic
 
             print(
                 f".\n.\nEnd ({i}/{len(files) - 1}):\n{filename=}, {elapsed=:.0f}, {estimated_time=}\n.\n.\n",
@@ -631,9 +631,9 @@ def run_unittest_files(files: List[TestFile], timeout_per_file: float):
             break
 
     if success:
-        print(f"Success. Time elapsed: {time.time() - tic:.2f}s", flush=True)
+        print(f"Success. Time elapsed: {time.perf_counter() - tic:.2f}s", flush=True)
     else:
-        print(f"Fail. Time elapsed: {time.time() - tic:.2f}s", flush=True)
+        print(f"Fail. Time elapsed: {time.perf_counter() - tic:.2f}s", flush=True)
 
     return 0 if success else -1
 
diff --git a/sgl-router/py_test/test_launch_server.py b/sgl-router/py_test/test_launch_server.py
index 33dd3e854..afffe334f 100644
--- a/sgl-router/py_test/test_launch_server.py
+++ b/sgl-router/py_test/test_launch_server.py
@@ -92,9 +92,9 @@ def popen_launch_router(
 
     process = subprocess.Popen(command, stdout=None, stderr=None)
 
-    start_time = time.time()
+    start_time = time.perf_counter()
     with requests.Session() as session:
-        while time.time() - start_time < timeout:
+        while time.perf_counter() - start_time < timeout:
             try:
                 response = session.get(f"{base_url}/health")
                 if response.status_code == 200:
@@ -155,11 +155,11 @@ def terminate_and_wait(process, timeout=300):
         return
 
     process.terminate()
-    start_time = time.time()
+    start_time = time.perf_counter()
 
     while process.poll() is None:
         print(f"Terminating process {process.pid}")
-        if time.time() - start_time > timeout:
+        if time.perf_counter() - start_time > timeout:
             raise TimeoutError(
                 f"Process {process.pid} failed to terminate within {timeout}s"
             )
diff --git a/test/srt/experiment_runner.py b/test/srt/experiment_runner.py
index 7feeef1aa..f32f61d3b 100644
--- a/test/srt/experiment_runner.py
+++ b/test/srt/experiment_runner.py
@@ -184,9 +184,9 @@ class ExperimentRunner:
         self.logger = logging.getLogger(__name__)
 
     def wait_for_server(self, port: int, timeout: int = 300) -> bool:
-        start_time = time.time()
+        start_time = time.perf_counter()
 
-        while time.time() - start_time < timeout:
+        while time.perf_counter() - start_time < timeout:
             try:
                 response = requests.get(f"http://localhost:{port}/health")
                 if response.status_code == 200:
@@ -197,7 +197,7 @@ class ExperimentRunner:
         return False
 
     def run_task(self, config: TaskConfig) -> TaskResult:
-        start_time = time.time()
+        start_time = time.perf_counter()
         client_output = []
 
         try:
@@ -247,7 +247,7 @@ class ExperimentRunner:
                 name=config.name,
                 success=True,
                 output=formatted_output,
-                runtime=time.time() - start_time,
+                runtime=time.perf_counter() - start_time,
                 timestamp=datetime.now().isoformat(),
             )
 
@@ -256,7 +256,7 @@ class ExperimentRunner:
                 name=config.name,
                 success=False,
                 output=str(e),
-                runtime=time.time() - start_time,
+                runtime=time.perf_counter() - start_time,
                 timestamp=datetime.now().isoformat(),
             )
 
diff --git a/test/srt/models/test_encoder_embedding_models.py b/test/srt/models/test_encoder_embedding_models.py
index 5202917c4..bea5d4aff 100644
--- a/test/srt/models/test_encoder_embedding_models.py
+++ b/test/srt/models/test_encoder_embedding_models.py
@@ -79,9 +79,9 @@ class TestEncoderEmbeddingModels(CustomTestCase):
             # warm up
             hf_outputs = hf_runner.forward(truncated_prompts)
 
-            st_start_time = time.time()
+            st_start_time = time.perf_counter()
             hf_outputs = hf_runner.forward(truncated_prompts)
-            st_end_time = time.time()
+            st_end_time = time.perf_counter()
 
         with SRTRunner(
             model_path,
@@ -95,9 +95,9 @@ class TestEncoderEmbeddingModels(CustomTestCase):
             # warm up
             srt_outputs = srt_runner.forward(truncated_prompts)
 
-            sgl_start_time = time.time()
+            sgl_start_time = time.perf_counter()
             srt_outputs = srt_runner.forward(truncated_prompts)
-            sgl_end_time = time.time()
+            sgl_end_time = time.perf_counter()
 
         transformer_time = st_end_time - st_start_time
         sgl_time = sgl_end_time - sgl_start_time
diff --git a/test/srt/test_gptqmodel_dynamic.py b/test/srt/test_gptqmodel_dynamic.py
index 27ccd9a4b..284465b8b 100644
--- a/test/srt/test_gptqmodel_dynamic.py
+++ b/test/srt/test_gptqmodel_dynamic.py
@@ -130,9 +130,9 @@ class TestGPTQModelDynamic(CustomTestCase):
     def test_throughput(self):
         max_tokens = 256
 
-        tic = time.time()
+        tic = time.perf_counter()
         result = self.run_decode(max_tokens)
-        tok = time.time()
+        tok = time.perf_counter()
 
         print(f"result = `{result}`")
 
@@ -185,9 +185,9 @@ class TestGPTQModelDynamicWithMarlin(CustomTestCase):
     def test_throughput(self):
         max_tokens = 256
 
-        tic = time.time()
+        tic = time.perf_counter()
         result = self.run_decode(max_tokens)
-        tok = time.time()
+        tok = time.perf_counter()
 
         print(f"result = `{result}`")
 
diff --git a/test/srt/test_release_memory_occupation.py b/test/srt/test_release_memory_occupation.py
index 7ccd9f1f7..7a7659280 100644
--- a/test/srt/test_release_memory_occupation.py
+++ b/test/srt/test_release_memory_occupation.py
@@ -42,10 +42,10 @@ class TestReleaseMemoryOccupation(CustomTestCase):
         )
 
         print("release_memory_occupation start")
-        t = time.time()
+        t = time.perf_counter()
         engine.release_memory_occupation()
         if _DEBUG_EXTRA:
-            print("release_memory_occupation", time.time() - t)
+            print("release_memory_occupation", time.perf_counter() - t)
 
         if _DEBUG_EXTRA:
             time.sleep(5)
@@ -60,10 +60,10 @@ class TestReleaseMemoryOccupation(CustomTestCase):
             time.sleep(5)
 
         print("resume_memory_occupation start")
-        t = time.time()
+        t = time.perf_counter()
         engine.resume_memory_occupation()
         if _DEBUG_EXTRA:
-            print("resume_memory_occupation", time.time() - t)
+            print("resume_memory_occupation", time.perf_counter() - t)
 
         self.assertEqual(
             _try_allocate_big_tensor(),
diff --git a/test/srt/test_torch_compile.py b/test/srt/test_torch_compile.py
index 760cec84b..904e49f9d 100644
--- a/test/srt/test_torch_compile.py
+++ b/test/srt/test_torch_compile.py
@@ -62,9 +62,9 @@ class TestTorchCompile(CustomTestCase):
         res = self.run_decode(16)
 
         max_tokens = 256
-        tic = time.time()
+        tic = time.perf_counter()
         res = self.run_decode(max_tokens)
-        tok = time.time()
+        tok = time.perf_counter()
         print(f"{res=}")
         throughput = max_tokens / (tok - tic)
         print(f"Throughput: {throughput} tokens/s")
diff --git a/test/srt/test_torch_compile_moe.py b/test/srt/test_torch_compile_moe.py
index 42415b155..63423af43 100644
--- a/test/srt/test_torch_compile_moe.py
+++ b/test/srt/test_torch_compile_moe.py
@@ -62,9 +62,9 @@ class TestTorchCompileMoe(CustomTestCase):
         res = self.run_decode(16)
 
         max_tokens = 256
-        tic = time.time()
+        tic = time.perf_counter()
         res = self.run_decode(max_tokens)
-        tok = time.time()
+        tok = time.perf_counter()
         print(f"{res=}")
         throughput = max_tokens / (tok - tic)
         self.assertGreaterEqual(throughput, 285)
diff --git a/test/srt/test_torchao.py b/test/srt/test_torchao.py
index 77ec0a570..13c7b60b5 100644
--- a/test/srt/test_torchao.py
+++ b/test/srt/test_torchao.py
@@ -61,9 +61,9 @@ class TestTorchAO(CustomTestCase):
 
         max_tokens = 256
 
-        tic = time.time()
+        tic = time.perf_counter()
         res = self.run_decode(max_tokens)
-        tok = time.time()
+        tok = time.perf_counter()
         print(res["text"])
         throughput = max_tokens / (tok - tic)
         print(f"Throughput: {throughput} tokens/s")
diff --git a/test/srt/test_update_weights_from_distributed.py b/test/srt/test_update_weights_from_distributed.py
index e558a56e3..064406703 100644
--- a/test/srt/test_update_weights_from_distributed.py
+++ b/test/srt/test_update_weights_from_distributed.py
@@ -164,7 +164,7 @@ def init_process_hf(
     )
     dist.barrier(group=group, device_ids=[rank])
     torch.cuda.synchronize()
-    time_begin_broadcast = time.time()
+    time_begin_broadcast = time.perf_counter()
 
     # The last parameter is lm_head.weight, which is tied
     # with embed_tokens.weight. Actually, we only need
@@ -182,7 +182,7 @@ def init_process_hf(
             group=group,
         )
     torch.cuda.synchronize()
-    time_end_broadcast = time.time()
+    time_end_broadcast = time.perf_counter()
 
     # Measure the latency of broadcasting/weights update.
     broadcast_time = time_end_broadcast - time_begin_broadcast
@@ -282,7 +282,7 @@ def init_process_sgl(
         )
 
     torch.cuda.synchronize()
-    time_begin_update = time.time()
+    time_begin_update = time.perf_counter()
 
     # The last parameter is lm_head.weight, which is tied
     # with embed_tokens.weight. Actually, we only need
@@ -312,7 +312,7 @@ def init_process_sgl(
                 },
             )
     torch.cuda.synchronize()
-    time_end_update = time.time()
+    time_end_update = time.perf_counter()
 
     # Measure the latency of broadcast/weights update.
     update_time = time_end_update - time_begin_update
diff --git a/test/srt/test_update_weights_from_tensor.py b/test/srt/test_update_weights_from_tensor.py
index 1f3592447..38187652b 100644
--- a/test/srt/test_update_weights_from_tensor.py
+++ b/test/srt/test_update_weights_from_tensor.py
@@ -21,9 +21,9 @@ def test_update_weights_from_tensor(tp_size):
     memory_before = torch.cuda.memory_allocated()
     new_tensor = torch.full((16384, 2048), 1.5, device="cuda")
 
-    time_start = time.time()
+    time_start = time.perf_counter()
     engine.update_weights_from_tensor([(x, new_tensor) for x in param_names])
-    print(f"Time delta: {time.time() - time_start:.03f}")
+    print(f"Time delta: {time.perf_counter() - time_start:.03f}")
 
     for param_name in param_names[:3]:
         _check_param(engine, param_name, [1.5] * 5)
diff --git a/test/srt/test_w8a8_quantization.py b/test/srt/test_w8a8_quantization.py
index 2cb2fa073..3d4ce1afa 100644
--- a/test/srt/test_w8a8_quantization.py
+++ b/test/srt/test_w8a8_quantization.py
@@ -62,9 +62,9 @@ class TestW8A8(CustomTestCase):
     def test_throughput(self):
         max_tokens = 256
 
-        tic = time.time()
+        tic = time.perf_counter()
         res = self.run_decode(max_tokens)
-        tok = time.time()
+        tok = time.perf_counter()
         print(res["text"])
         throughput = max_tokens / (tok - tic)
         print(f"Throughput: {throughput} tokens/s")