From 1bebd3154e3adb205cf1c64573cfca8d82536d0a Mon Sep 17 00:00:00 2001 From: Ziming Huang <1520787127@qq.com> Date: Thu, 3 Jul 2025 13:31:49 +0800 Subject: [PATCH] Fix num_tokens_pre_allocated in disaggregation log (#7714) --- python/sglang/srt/disaggregation/decode.py | 6 ++++++ python/sglang/srt/managers/scheduler.py | 5 +---- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/python/sglang/srt/disaggregation/decode.py b/python/sglang/srt/disaggregation/decode.py index ea7f1cc0e..07e06b9f4 100644 --- a/python/sglang/srt/disaggregation/decode.py +++ b/python/sglang/srt/disaggregation/decode.py @@ -416,6 +416,12 @@ class DecodePreallocQueue: return preallocated_reqs + @property + def num_tokens_pre_allocated(self): + return sum( + len(decode_req.req.fill_ids) for decode_req in self.transfer_queue.queue + ) + def _allocatable_tokens( self, retractable_tokens: Optional[int] = None, count_retracted: bool = True ) -> int: diff --git a/python/sglang/srt/managers/scheduler.py b/python/sglang/srt/managers/scheduler.py index f7bd19f7a..8e910c0ee 100644 --- a/python/sglang/srt/managers/scheduler.py +++ b/python/sglang/srt/managers/scheduler.py @@ -707,9 +707,6 @@ class Scheduler( transfer_backend=self.transfer_backend, ) - # Metric for pre-allocation - self.num_tokens_pre_allocated = 0 - elif self.disaggregation_mode == DisaggregationMode.PREFILL: # *2 for the headroom. buffer_size = self.max_running_requests * 2 @@ -1372,7 +1369,7 @@ class Scheduler( msg += f"accept len: {spec_accept_length:.2f}, " if self.disaggregation_mode == DisaggregationMode.DECODE: - msg += f"pre-allocated usage: {self.num_tokens_pre_allocated / self.max_total_num_tokens:.2f}, " + msg += f"pre-allocated usage: {self.disagg_decode_prealloc_queue.num_tokens_pre_allocated / self.max_total_num_tokens:.2f}, " msg += f"#retracted-req: {len(self.disagg_decode_prealloc_queue.retracted_queue)}, " msg += (