From 3cf1473a095ccee4b8fd2e4dbb2b2af65283e03c Mon Sep 17 00:00:00 2001
From: Lifu Huang <lifu.hlf@gmail.com>
Date: Sat, 17 May 2025 16:49:18 -0700
Subject: [PATCH] Use monotonic clock for interval measurement  (#6211)

Signed-off-by: Lifu Huang <lifu.hlf@gmail.com>
---
 .../usage/llava_video/srt_example_llava_v.py  |  8 +++---
 .../engine/fastapi_engine_inference.py        |  4 +--
 .../multimodal/llava_onevision_server.py      |  8 +++---
 python/sglang/bench_one_batch.py              |  8 +++---
 python/sglang/bench_one_batch_server.py       | 10 +++----
 python/sglang/compile_deep_gemm.py            |  8 +++---
 python/sglang/srt/distributed/utils.py        |  6 ++---
 python/sglang/srt/entrypoints/http_server.py  |  4 +--
 .../srt/entrypoints/http_server_engine.py     |  4 +--
 python/sglang/srt/managers/scheduler.py       | 26 +++++++++----------
 python/sglang/srt/mem_cache/hiradix_cache.py  |  8 +++---
 python/sglang/srt/mem_cache/radix_cache.py    | 10 +++----
 python/sglang/srt/metrics/collector.py        |  4 +--
 .../sglang/srt/model_executor/model_runner.py |  4 +--
 python/sglang/srt/speculative/eagle_worker.py |  4 +--
 python/sglang/srt/utils.py                    |  8 +++---
 python/sglang/test/test_utils.py              |  4 +--
 python/sglang/utils.py                        |  4 +--
 .../py_src/sglang_router/launch_server.py     |  4 +--
 test/srt/test_disaggregation.py               |  4 +--
 test/srt/test_disaggregation_different_tp.py  |  4 +--
 21 files changed, 72 insertions(+), 72 deletions(-)

diff --git a/examples/frontend_language/usage/llava_video/srt_example_llava_v.py b/examples/frontend_language/usage/llava_video/srt_example_llava_v.py
index bc56d4210..ec5b334b0 100644
--- a/examples/frontend_language/usage/llava_video/srt_example_llava_v.py
+++ b/examples/frontend_language/usage/llava_video/srt_example_llava_v.py
@@ -109,9 +109,9 @@ def batch(video_dir, save_dir, cur_chunk, num_chunks, num_frames=16, batch_size=
             for video_path in batch_video_files
         ]
 
-        start_time = time.time()
+        start_time = time.perf_counter()
         states = video_qa.run_batch(batch_input, max_new_tokens=512, temperature=0.2)
-        total_time = time.time() - start_time
+        total_time = time.perf_counter() - start_time
         average_time = total_time / len(batch_video_files)
         print(
             f"Number of videos in batch: {len(batch_video_files)}. Average processing time per video: {average_time:.2f} seconds. Total time for this batch: {total_time:.2f} seconds"
@@ -240,11 +240,11 @@ if __name__ == "__main__":
             for f in os.listdir(root)
             if f.endswith((".mp4", ".avi", ".mov"))
         ]  # Add more extensions if needed
-    start_time = time.time()  # Start time for processing a single video
+    start_time = time.perf_counter()  # Start time for processing a single video
     for cur_video in video_files[:1]:
         print(cur_video)
         single(cur_video, num_frames)
-    end_time = time.time()  # End time for processing a single video
+    end_time = time.perf_counter()  # End time for processing a single video
     total_time = end_time - start_time
     average_time = total_time / len(
         video_files
diff --git a/examples/runtime/engine/fastapi_engine_inference.py b/examples/runtime/engine/fastapi_engine_inference.py
index 57b83bcba..a755cf8d8 100644
--- a/examples/runtime/engine/fastapi_engine_inference.py
+++ b/examples/runtime/engine/fastapi_engine_inference.py
@@ -89,9 +89,9 @@ def start_server(args, timeout=60):
 
     process = subprocess.Popen(command, stdout=None, stderr=None)
 
-    start_time = time.time()
+    start_time = time.perf_counter()
     with requests.Session() as session:
-        while time.time() - start_time < timeout:
+        while time.perf_counter() - start_time < timeout:
             try:
                 # Check the /docs endpoint which FastAPI provides by default
                 response = session.get(
diff --git a/examples/runtime/multimodal/llava_onevision_server.py b/examples/runtime/multimodal/llava_onevision_server.py
index 94a0fee94..ee921b558 100644
--- a/examples/runtime/multimodal/llava_onevision_server.py
+++ b/examples/runtime/multimodal/llava_onevision_server.py
@@ -150,7 +150,7 @@ def video_stream_request_test(client, video_path):
 
 def image_speed_test(client):
     print("----------------------Image Speed Test----------------------")
-    start_time = time.time()
+    start_time = time.perf_counter()
     request = client.chat.completions.create(
         model="default",
         messages=[
@@ -173,7 +173,7 @@ def image_speed_test(client):
         temperature=0,
         max_tokens=1024,
     )
-    end_time = time.time()
+    end_time = time.perf_counter()
     response = request.choices[0].message.content
     print(response)
     print("-" * 30)
@@ -184,14 +184,14 @@ def video_speed_test(client, video_path):
     print("------------------------Video Speed Test------------------------")
     messages = prepare_video_messages(video_path)
 
-    start_time = time.time()
+    start_time = time.perf_counter()
     video_request = client.chat.completions.create(
         model="default",
         messages=messages,
         temperature=0,
         max_tokens=1024,
     )
-    end_time = time.time()
+    end_time = time.perf_counter()
     video_response = video_request.choices[0].message.content
     print(video_response)
     print("-" * 30)
diff --git a/python/sglang/bench_one_batch.py b/python/sglang/bench_one_batch.py
index 1ddb36c48..3367cb752 100644
--- a/python/sglang/bench_one_batch.py
+++ b/python/sglang/bench_one_batch.py
@@ -373,10 +373,10 @@ def latency_test_run_once(
 
     # Prefill
     synchronize(device)
-    tic = time.time()
+    tic = time.perf_counter()
     next_token_ids, _, batch = extend(reqs, model_runner)
     synchronize(device)
-    prefill_latency = time.time() - tic
+    prefill_latency = time.perf_counter() - tic
     tot_latency += prefill_latency
     throughput = input_len * batch_size / prefill_latency
     rank_print(
@@ -389,10 +389,10 @@ def latency_test_run_once(
     decode_latencies = []
     for i in range(output_len - 1):
         synchronize(device)
-        tic = time.time()
+        tic = time.perf_counter()
         next_token_ids, _ = decode(next_token_ids, batch, model_runner)
         synchronize(device)
-        latency = time.time() - tic
+        latency = time.perf_counter() - tic
         tot_latency += latency
         throughput = batch_size / latency
         decode_latencies.append(latency)
diff --git a/python/sglang/bench_one_batch_server.py b/python/sglang/bench_one_batch_server.py
index 73ee8dc9f..da091bf98 100644
--- a/python/sglang/bench_one_batch_server.py
+++ b/python/sglang/bench_one_batch_server.py
@@ -92,8 +92,8 @@ def launch_server_process(server_args: ServerArgs):
     base_url = f"http://{server_args.host}:{server_args.port}"
     timeout = 600
 
-    start_time = time.time()
-    while time.time() - start_time < timeout:
+    start_time = time.perf_counter()
+    while time.perf_counter() - start_time < timeout:
         try:
             headers = {
                 "Content-Type": "application/json; charset=utf-8",
@@ -141,7 +141,7 @@ def run_one_case(
     else:
         json_schema = None
 
-    tic = time.time()
+    tic = time.perf_counter()
     response = requests.post(
         url + "/generate",
         json={
@@ -175,9 +175,9 @@ def run_one_case(
                 or data["meta_info"]["finish_reason"]["type"] == "length"
             )
             if data["meta_info"]["completion_tokens"] == 1:
-                ttft = time.time() - tic
+                ttft = time.perf_counter() - tic
 
-    latency = time.time() - tic
+    latency = time.perf_counter() - tic
     input_throughput = batch_size * input_len / ttft
     output_throughput = batch_size * output_len / (latency - ttft)
     overall_throughput = batch_size * (input_len + output_len) / latency
diff --git a/python/sglang/compile_deep_gemm.py b/python/sglang/compile_deep_gemm.py
index 84b52962f..1a17fad89 100644
--- a/python/sglang/compile_deep_gemm.py
+++ b/python/sglang/compile_deep_gemm.py
@@ -82,8 +82,8 @@ def launch_server_process_and_send_one_request(
     base_url = f"http://{server_args.host}:{server_args.port}"
     timeout = compile_args.timeout
 
-    start_time = time.time()
-    while time.time() - start_time < timeout:
+    start_time = time.perf_counter()
+    while time.perf_counter() - start_time < timeout:
         try:
             headers = {
                 "Content-Type": "application/json; charset=utf-8",
@@ -112,9 +112,9 @@ def launch_server_process_and_send_one_request(
                         raise RuntimeError(f"Sync request failed: {error}")
                 # Other nodes should wait for the exit signal from Rank-0 node.
                 else:
-                    start_time_waiting = time.time()
+                    start_time_waiting = time.perf_counter()
                     while proc.is_alive():
-                        if time.time() - start_time_waiting < timeout:
+                        if time.perf_counter() - start_time_waiting < timeout:
                             time.sleep(10)
                         else:
                             raise TimeoutError("Waiting for main node timeout!")
diff --git a/python/sglang/srt/distributed/utils.py b/python/sglang/srt/distributed/utils.py
index e117aa30d..bfe54b9d4 100644
--- a/python/sglang/srt/distributed/utils.py
+++ b/python/sglang/srt/distributed/utils.py
@@ -127,14 +127,14 @@ class StatelessProcessGroup:
         key = f"send_to/{dst}/{self.send_dst_counter[dst]}"
         self.store.set(key, pickle.dumps(obj))
         self.send_dst_counter[dst] += 1
-        self.entries.append((key, time.time()))
+        self.entries.append((key, time.perf_counter()))
 
     def expire_data(self):
         """Expire data that is older than `data_expiration_seconds` seconds."""
         while self.entries:
             # check the oldest entry
             key, timestamp = self.entries[0]
-            if time.time() - timestamp > self.data_expiration_seconds:
+            if time.perf_counter() - timestamp > self.data_expiration_seconds:
                 self.store.delete_key(key)
                 self.entries.popleft()
             else:
@@ -158,7 +158,7 @@ class StatelessProcessGroup:
             key = f"broadcast_from/{src}/" f"{self.broadcast_send_counter}"
             self.store.set(key, pickle.dumps(obj))
             self.broadcast_send_counter += 1
-            self.entries.append((key, time.time()))
+            self.entries.append((key, time.perf_counter()))
             return obj
         else:
             key = f"broadcast_from/{src}/" f"{self.broadcast_recv_src_counter[src]}"
diff --git a/python/sglang/srt/entrypoints/http_server.py b/python/sglang/srt/entrypoints/http_server.py
index 053107a02..b1363b6c7 100644
--- a/python/sglang/srt/entrypoints/http_server.py
+++ b/python/sglang/srt/entrypoints/http_server.py
@@ -182,9 +182,9 @@ async def health_generate(request: Request) -> Response:
         async for _ in _global_state.tokenizer_manager.generate_request(gri, request):
             break
 
-    tic = time.time()
+    tic = time.perf_counter()
     task = asyncio.create_task(gen())
-    while time.time() < tic + HEALTH_CHECK_TIMEOUT:
+    while time.perf_counter() < tic + HEALTH_CHECK_TIMEOUT:
         await asyncio.sleep(1)
         if _global_state.tokenizer_manager.last_receive_tstamp > tic:
             task.cancel()
diff --git a/python/sglang/srt/entrypoints/http_server_engine.py b/python/sglang/srt/entrypoints/http_server_engine.py
index f50e13f3c..ace569e56 100644
--- a/python/sglang/srt/entrypoints/http_server_engine.py
+++ b/python/sglang/srt/entrypoints/http_server_engine.py
@@ -24,10 +24,10 @@ def launch_server_process(server_args: ServerArgs) -> multiprocessing.Process:
 
     base_url = server_args.url()
     timeout = 300.0  # Increased timeout to 5 minutes for downloading large models
-    start_time = time.time()
+    start_time = time.perf_counter()
 
     with requests.Session() as session:
-        while time.time() - start_time < timeout:
+        while time.perf_counter() - start_time < timeout:
             try:
                 headers = {
                     "Content-Type": "application/json; charset=utf-8",
diff --git a/python/sglang/srt/managers/scheduler.py b/python/sglang/srt/managers/scheduler.py
index a6b614bdc..0b9a1dd15 100644
--- a/python/sglang/srt/managers/scheduler.py
+++ b/python/sglang/srt/managers/scheduler.py
@@ -348,8 +348,8 @@ class Scheduler(
         self.forward_ct_decode = 0
         self.num_generated_tokens = 0
         self.num_prefill_tokens = 0
-        self.last_decode_stats_tic = time.time()
-        self.last_prefill_stats_tic = time.time()
+        self.last_decode_stats_tic = time.perf_counter()
+        self.last_prefill_stats_tic = time.perf_counter()
         self.return_health_check_ct = 0
         self.current_stream = torch.get_device_module(self.device).current_stream()
         if self.device == "cpu":
@@ -1032,13 +1032,13 @@ class Scheduler(
                 add_to_grammar_queue = True
 
         if add_to_grammar_queue:
-            req.queue_time_start = time.time()
+            req.queue_time_start = time.perf_counter()
             self.grammar_queue.append(req)
         else:
             self._add_request_to_queue(req)
 
     def _add_request_to_queue(self, req: Req):
-        req.queue_time_start = time.time()
+        req.queue_time_start = time.perf_counter()
         if self.disaggregation_mode == DisaggregationMode.PREFILL:
             self.disagg_prefill_bootstrap_queue.add(req)
         elif self.disaggregation_mode == DisaggregationMode.DECODE:
@@ -1085,7 +1085,7 @@ class Scheduler(
                 req.finished_reason = FINISH_ABORT(
                     error_msg, HTTPStatus.BAD_REQUEST, "BadRequestError"
                 )
-                req.queue_time_start = time.time()
+                req.queue_time_start = time.perf_counter()
                 self.waiting_queue.append(req)
                 return
 
@@ -1109,8 +1109,8 @@ class Scheduler(
         can_run_list: List[Req],
         running_bs: int,
     ):
-        gap_latency = time.time() - self.last_prefill_stats_tic
-        self.last_prefill_stats_tic = time.time()
+        gap_latency = time.perf_counter() - self.last_prefill_stats_tic
+        self.last_prefill_stats_tic = time.perf_counter()
         self.last_input_throughput = self.num_prefill_tokens / gap_latency
         self.num_prefill_tokens = 0
 
@@ -1160,8 +1160,8 @@ class Scheduler(
     ):
         batch = running_batch or self.running_batch
 
-        gap_latency = time.time() - self.last_decode_stats_tic
-        self.last_decode_stats_tic = time.time()
+        gap_latency = time.perf_counter() - self.last_decode_stats_tic
+        self.last_decode_stats_tic = time.perf_counter()
         self.last_gen_throughput = self.num_generated_tokens / gap_latency
         self.num_generated_tokens = 0
         num_running_reqs = len(batch.reqs)
@@ -1245,7 +1245,7 @@ class Scheduler(
         if (
             self.enable_metrics
             and self.attn_tp_rank == 0
-            and time.time() > self.metrics_collector.last_log_time + 30
+            and time.perf_counter() > self.metrics_collector.last_log_time + 30
         ):
             # During idle time, also collect metrics every 30 seconds.
             num_used = self.max_total_num_tokens - (
@@ -1410,7 +1410,7 @@ class Scheduler(
         if self.enable_metrics:
             # only record queue time when enable_metrics is True to avoid overhead
             for req in can_run_list:
-                req.queue_time_end = time.time()
+                req.queue_time_end = time.perf_counter()
 
         self.waiting_queue = [
             x for x in self.waiting_queue if x not in set(can_run_list)
@@ -1783,10 +1783,10 @@ class Scheduler(
     def watchdog_thread(self):
         """A watch dog thread that will try to kill the server itself if one forward batch takes too long."""
         self.watchdog_last_forward_ct = 0
-        self.watchdog_last_time = time.time()
+        self.watchdog_last_time = time.perf_counter()
 
         while True:
-            current = time.time()
+            current = time.perf_counter()
             if self.cur_batch is not None:
                 if self.watchdog_last_forward_ct == self.forward_ct:
                     if current > self.watchdog_last_time + self.watchdog_timeout:
diff --git a/python/sglang/srt/mem_cache/hiradix_cache.py b/python/sglang/srt/mem_cache/hiradix_cache.py
index 1e720844a..4bec901aa 100644
--- a/python/sglang/srt/mem_cache/hiradix_cache.py
+++ b/python/sglang/srt/mem_cache/hiradix_cache.py
@@ -335,13 +335,13 @@ class HiRadixCache(RadixCache):
             return value, last_node
 
     def _match_prefix_helper(self, node: TreeNode, key: List):
-        node.last_access_time = time.time()
+        node.last_access_time = time.monotonic()
         child_key = self.get_child_key_fn(key)
         value = []
 
         while len(key) > 0 and child_key in node.children.keys():
             child = node.children[child_key]
-            child.last_access_time = time.time()
+            child.last_access_time = time.monotonic()
             prefix_len = self.key_match_fn(child.key, key)
             if prefix_len < len(child.key):
                 new_node = self._split_node(child.key, child, prefix_len)
@@ -386,7 +386,7 @@ class HiRadixCache(RadixCache):
         return new_node
 
     def _insert_helper(self, node: TreeNode, key: List, value):
-        node.last_access_time = time.time()
+        node.last_access_time = time.monotonic()
         if len(key) == 0:
             return 0
 
@@ -395,7 +395,7 @@ class HiRadixCache(RadixCache):
 
         while len(key) > 0 and child_key in node.children.keys():
             node = node.children[child_key]
-            node.last_access_time = time.time()
+            node.last_access_time = time.monotonic()
             prefix_len = self.key_match_fn(node.key, key)
 
             if prefix_len == len(node.key):
diff --git a/python/sglang/srt/mem_cache/radix_cache.py b/python/sglang/srt/mem_cache/radix_cache.py
index dd608c154..b1fd645be 100644
--- a/python/sglang/srt/mem_cache/radix_cache.py
+++ b/python/sglang/srt/mem_cache/radix_cache.py
@@ -45,7 +45,7 @@ class TreeNode:
         self.key = None
         self.value = None
         self.lock_ref = 0
-        self.last_access_time = time.time()
+        self.last_access_time = time.monotonic()
 
         self.hit_count = 0
         # indicating the node is loading KV cache from host
@@ -322,14 +322,14 @@ class RadixCache(BasePrefixCache):
     ##### Internal Helper Functions #####
 
     def _match_prefix_helper(self, node: TreeNode, key: List):
-        node.last_access_time = time.time()
+        node.last_access_time = time.monotonic()
 
         child_key = self.get_child_key_fn(key)
 
         value = []
         while len(key) > 0 and child_key in node.children.keys():
             child = node.children[child_key]
-            child.last_access_time = time.time()
+            child.last_access_time = time.monotonic()
             prefix_len = self.key_match_fn(child.key, key)
             if prefix_len < len(child.key):
                 new_node = self._split_node(child.key, child, prefix_len)
@@ -361,7 +361,7 @@ class RadixCache(BasePrefixCache):
         return new_node
 
     def _insert_helper(self, node: TreeNode, key: List, value):
-        node.last_access_time = time.time()
+        node.last_access_time = time.monotonic()
         if len(key) == 0:
             return 0
 
@@ -370,7 +370,7 @@ class RadixCache(BasePrefixCache):
         total_prefix_length = 0
         while len(key) > 0 and child_key in node.children.keys():
             node = node.children[child_key]
-            node.last_access_time = time.time()
+            node.last_access_time = time.monotonic()
             prefix_len = self.key_match_fn(node.key, key)
             total_prefix_length += prefix_len
             key = key[prefix_len:]
diff --git a/python/sglang/srt/metrics/collector.py b/python/sglang/srt/metrics/collector.py
index aa407e0ec..1c4b819f8 100644
--- a/python/sglang/srt/metrics/collector.py
+++ b/python/sglang/srt/metrics/collector.py
@@ -154,7 +154,7 @@ class SchedulerMetricsCollector:
         from prometheus_client import Counter, Gauge
 
         self.labels = labels
-        self.last_log_time = time.time()
+        self.last_log_time = time.perf_counter()
 
         self.num_running_reqs = Gauge(
             name="sglang:num_running_reqs",
@@ -294,7 +294,7 @@ class SchedulerMetricsCollector:
             self.num_decode_transfer_queue_reqs, stats.num_decode_transfer_queue_reqs
         )
 
-        self.last_log_time = time.time()
+        self.last_log_time = time.perf_counter()
 
 
 class TokenizerMetricsCollector:
diff --git a/python/sglang/srt/model_executor/model_runner.py b/python/sglang/srt/model_executor/model_runner.py
index 3846a283d..c246fd82d 100644
--- a/python/sglang/srt/model_executor/model_runner.py
+++ b/python/sglang/srt/model_executor/model_runner.py
@@ -1019,7 +1019,7 @@ class ModelRunner:
         if self.server_args.disable_cuda_graph:
             return
 
-        tic = time.time()
+        tic = time.perf_counter()
         before_mem = get_available_gpu_memory(self.device, self.gpu_id)
         logger.info(
             f"Capture cuda graph begin. This can take up to several minutes. avail mem={before_mem:.2f} GB"
@@ -1027,7 +1027,7 @@ class ModelRunner:
         self.cuda_graph_runner = CudaGraphRunner(self)
         after_mem = get_available_gpu_memory(self.device, self.gpu_id)
         logger.info(
-            f"Capture cuda graph end. Time elapsed: {time.time() - tic:.2f} s. "
+            f"Capture cuda graph end. Time elapsed: {time.perf_counter() - tic:.2f} s. "
             f"mem usage={(before_mem - after_mem):.2f} GB. avail mem={after_mem:.2f} GB."
         )
 
diff --git a/python/sglang/srt/speculative/eagle_worker.py b/python/sglang/srt/speculative/eagle_worker.py
index 08904dbfc..ebbff0e8f 100644
--- a/python/sglang/srt/speculative/eagle_worker.py
+++ b/python/sglang/srt/speculative/eagle_worker.py
@@ -228,7 +228,7 @@ class EAGLEWorker(TpModelWorker):
             return
 
         # Capture draft
-        tic = time.time()
+        tic = time.perf_counter()
         before_mem = get_available_gpu_memory(self.device, self.gpu_id)
         logger.info(
             f"Capture draft cuda graph begin. This can take up to several minutes. avail mem={before_mem:.2f} GB"
@@ -236,7 +236,7 @@ class EAGLEWorker(TpModelWorker):
         self.cuda_graph_runner = EAGLEDraftCudaGraphRunner(self)
         after_mem = get_available_gpu_memory(self.device, self.gpu_id)
         logger.info(
-            f"Capture draft cuda graph end. Time elapsed: {time.time() - tic:.2f} s. avail mem={after_mem:.2f} GB. mem usage={(before_mem - after_mem):.2f} GB."
+            f"Capture draft cuda graph end. Time elapsed: {time.perf_counter() - tic:.2f} s. avail mem={after_mem:.2f} GB. mem usage={(before_mem - after_mem):.2f} GB."
         )
 
         # Capture extend
diff --git a/python/sglang/srt/utils.py b/python/sglang/srt/utils.py
index 766e3bf3e..e82408fa5 100644
--- a/python/sglang/srt/utils.py
+++ b/python/sglang/srt/utils.py
@@ -246,7 +246,7 @@ def mark_start(name, interval=0.1, color=0, indent=0):
     torch.cuda.synchronize()
     if time_infos.get(name, None) is None:
         time_infos[name] = TimeInfo(name, interval, color, indent)
-    time_infos[name].acc_time -= time.time()
+    time_infos[name].acc_time -= time.perf_counter()
 
 
 def mark_end(name):
@@ -254,7 +254,7 @@ def mark_end(name):
     if not show_time_cost:
         return
     torch.cuda.synchronize()
-    time_infos[name].acc_time += time.time()
+    time_infos[name].acc_time += time.perf_counter()
     if time_infos[name].check():
         time_infos[name].pretty_print()
 
@@ -264,11 +264,11 @@ def calculate_time(show=False, min_cost_ms=0.0):
         def inner_func(*args, **kwargs):
             torch.cuda.synchronize()
             if show:
-                start_time = time.time()
+                start_time = time.perf_counter()
             result = func(*args, **kwargs)
             torch.cuda.synchronize()
             if show:
-                cost_time = (time.time() - start_time) * 1000
+                cost_time = (time.perf_counter() - start_time) * 1000
                 if cost_time > min_cost_ms:
                     print(f"Function {func.__name__} took {cost_time} ms to run.")
             return result
diff --git a/python/sglang/test/test_utils.py b/python/sglang/test/test_utils.py
index 6cc0717b0..22a6a47e7 100644
--- a/python/sglang/test/test_utils.py
+++ b/python/sglang/test/test_utils.py
@@ -526,9 +526,9 @@ def popen_launch_pd_server(
     else:
         process = subprocess.Popen(command, stdout=None, stderr=None, env=env)
 
-    start_time = time.time()
+    start_time = time.perf_counter()
     with requests.Session() as session:
-        while time.time() - start_time < timeout:
+        while time.perf_counter() - start_time < timeout:
             try:
                 headers = {
                     "Content-Type": "application/json; charset=utf-8",
diff --git a/python/sglang/utils.py b/python/sglang/utils.py
index e83aa112b..1d994c3b5 100644
--- a/python/sglang/utils.py
+++ b/python/sglang/utils.py
@@ -436,7 +436,7 @@ def wait_for_server(base_url: str, timeout: int = None) -> None:
         base_url: The base URL of the server
         timeout: Maximum time to wait in seconds. None means wait forever.
     """
-    start_time = time.time()
+    start_time = time.perf_counter()
     while True:
         try:
             response = requests.get(
@@ -455,7 +455,7 @@ def wait_for_server(base_url: str, timeout: int = None) -> None:
                 )
                 break
 
-            if timeout and time.time() - start_time > timeout:
+            if timeout and time.perf_counter() - start_time > timeout:
                 raise TimeoutError("Server did not become ready within timeout period")
         except requests.exceptions.RequestException:
             time.sleep(1)
diff --git a/sgl-router/py_src/sglang_router/launch_server.py b/sgl-router/py_src/sglang_router/launch_server.py
index 74353c21e..070fb9486 100644
--- a/sgl-router/py_src/sglang_router/launch_server.py
+++ b/sgl-router/py_src/sglang_router/launch_server.py
@@ -91,10 +91,10 @@ def launch_server_process(
 
 def wait_for_server_health(host: str, port: int, timeout: int = 300) -> bool:
     """Wait for server to be healthy by checking /health endpoint."""
-    start_time = time.time()
+    start_time = time.perf_counter()
     url = f"http://{host}:{port}/health"
 
-    while time.time() - start_time < timeout:
+    while time.perf_counter() - start_time < timeout:
         try:
             response = requests.get(url, timeout=5)
             if response.status_code == 200:
diff --git a/test/srt/test_disaggregation.py b/test/srt/test_disaggregation.py
index e3008598a..c46a0c29c 100644
--- a/test/srt/test_disaggregation.py
+++ b/test/srt/test_disaggregation.py
@@ -97,7 +97,7 @@ class TestDisaggregationMooncake(CustomTestCase):
 
     @classmethod
     def wait_server_ready(cls, url, timeout=60):
-        start_time = time.time()
+        start_time = time.perf_counter()
         while True:
             try:
                 response = requests.get(url)
@@ -107,7 +107,7 @@ class TestDisaggregationMooncake(CustomTestCase):
             except Exception:
                 pass
 
-            if time.time() - start_time > timeout:
+            if time.perf_counter() - start_time > timeout:
                 raise RuntimeError(f"Server {url} failed to start in {timeout}s")
             time.sleep(1)
 
diff --git a/test/srt/test_disaggregation_different_tp.py b/test/srt/test_disaggregation_different_tp.py
index 116fdb175..9b045dd10 100644
--- a/test/srt/test_disaggregation_different_tp.py
+++ b/test/srt/test_disaggregation_different_tp.py
@@ -102,7 +102,7 @@ class TestDisaggregationMooncakeDifferentTP(CustomTestCase):
 
     @classmethod
     def wait_server_ready(cls, url, timeout=60):
-        start_time = time.time()
+        start_time = time.perf_counter()
         while True:
             try:
                 response = requests.get(url)
@@ -112,7 +112,7 @@ class TestDisaggregationMooncakeDifferentTP(CustomTestCase):
             except Exception:
                 pass
 
-            if time.time() - start_time > timeout:
+            if time.perf_counter() - start_time > timeout:
                 raise RuntimeError(f"Server {url} failed to start in {timeout}s")
             time.sleep(1)