Log the timestamps of each prefill/decode iteration (#6094)

Co-authored-by: yuhsuan-t <12108766+yuhsaun-t@users.noreply.github.com>
This commit is contained in:
yuhsuan-t
2025-07-06 18:57:27 -07:00
committed by GitHub
parent a3398d8478
commit 8d4a01cbd7

View File

@@ -13,6 +13,7 @@
# ==============================================================================
"""A scheduler that manages a tensor parallel GPU worker."""
import datetime
import faulthandler
import logging
import os
@@ -1313,10 +1314,12 @@ class Scheduler(
f += f"#unbootstrapped-req: {len(self.disagg_prefill_bootstrap_queue.queue)}, "
f += f"#queue-req: {len(self.waiting_queue)}, "
f += f"#transferring-req: {len(self.disagg_prefill_inflight_queue)}, "
f += f"input throughput (token/s): {self.last_input_throughput:.2f} "
f += f"input throughput (token/s): {self.last_input_throughput:.2f}, "
else:
f += f"#running-req: {running_bs}, "
f += f"#queue-req: {len(self.waiting_queue)}"
f += f"#queue-req: {len(self.waiting_queue)}, "
f += f"timestamp: {datetime.datetime.now().isoformat()}"
logger.info(f)
@@ -1378,7 +1381,8 @@ class Scheduler(
msg += (
f"cuda graph: {can_run_cuda_graph}, "
f"gen throughput (token/s): {self.last_gen_throughput:.2f}, "
f"#queue-req: {len(self.waiting_queue)}"
f"#queue-req: {len(self.waiting_queue)}, "
f"timestamp: {datetime.datetime.now().isoformat()}"
)
logger.info(msg)