Log the timestamps of each prefill/decode iteration (#6094)
Co-authored-by: yuhsuan-t <12108766+yuhsaun-t@users.noreply.github.com>
This commit is contained in:
@@ -13,6 +13,7 @@
|
||||
# ==============================================================================
|
||||
"""A scheduler that manages a tensor parallel GPU worker."""
|
||||
|
||||
import datetime
|
||||
import faulthandler
|
||||
import logging
|
||||
import os
|
||||
@@ -1313,10 +1314,12 @@ class Scheduler(
|
||||
f += f"#unbootstrapped-req: {len(self.disagg_prefill_bootstrap_queue.queue)}, "
|
||||
f += f"#queue-req: {len(self.waiting_queue)}, "
|
||||
f += f"#transferring-req: {len(self.disagg_prefill_inflight_queue)}, "
|
||||
f += f"input throughput (token/s): {self.last_input_throughput:.2f} "
|
||||
f += f"input throughput (token/s): {self.last_input_throughput:.2f}, "
|
||||
else:
|
||||
f += f"#running-req: {running_bs}, "
|
||||
f += f"#queue-req: {len(self.waiting_queue)}"
|
||||
f += f"#queue-req: {len(self.waiting_queue)}, "
|
||||
|
||||
f += f"timestamp: {datetime.datetime.now().isoformat()}"
|
||||
|
||||
logger.info(f)
|
||||
|
||||
@@ -1378,7 +1381,8 @@ class Scheduler(
|
||||
msg += (
|
||||
f"cuda graph: {can_run_cuda_graph}, "
|
||||
f"gen throughput (token/s): {self.last_gen_throughput:.2f}, "
|
||||
f"#queue-req: {len(self.waiting_queue)}"
|
||||
f"#queue-req: {len(self.waiting_queue)}, "
|
||||
f"timestamp: {datetime.datetime.now().isoformat()}"
|
||||
)
|
||||
|
||||
logger.info(msg)
|
||||
|
||||
Reference in New Issue
Block a user