Expose max total num tokens from Runtime & Engine API (#2092)

2024-11-22 15:10:10 -08:00
parent 72f87b723b
commit c35cd1f8c7
4 changed files with 81 additions and 7 deletions
--- a/python/sglang/srt/managers/scheduler.py
+++ b/python/sglang/srt/managers/scheduler.py
@@ -1400,7 +1400,9 @@ def run_scheduler_process(

    try:
        scheduler = Scheduler(server_args, port_args, gpu_id, tp_rank, dp_rank)
-        pipe_writer.send("ready")
+        pipe_writer.send(
+            {"status": "ready", "max_total_num_tokens": scheduler.max_total_num_tokens}
+        )
        if scheduler.enable_overlap:
            scheduler.event_loop_overlap()
        else: