Fix num_tokens_pre_allocated in disaggregation log (#7714)

This commit is contained in:
Ziming Huang
2025-07-03 13:31:49 +08:00
committed by GitHub
parent d3c275b117
commit 1bebd3154e
2 changed files with 7 additions and 4 deletions

View File

@@ -416,6 +416,12 @@ class DecodePreallocQueue:
return preallocated_reqs
@property
def num_tokens_pre_allocated(self):
return sum(
len(decode_req.req.fill_ids) for decode_req in self.transfer_queue.queue
)
def _allocatable_tokens(
self, retractable_tokens: Optional[int] = None, count_retracted: bool = True
) -> int:

View File

@@ -707,9 +707,6 @@ class Scheduler(
transfer_backend=self.transfer_backend,
)
# Metric for pre-allocation
self.num_tokens_pre_allocated = 0
elif self.disaggregation_mode == DisaggregationMode.PREFILL:
# *2 for the headroom.
buffer_size = self.max_running_requests * 2
@@ -1372,7 +1369,7 @@ class Scheduler(
msg += f"accept len: {spec_accept_length:.2f}, "
if self.disaggregation_mode == DisaggregationMode.DECODE:
msg += f"pre-allocated usage: {self.num_tokens_pre_allocated / self.max_total_num_tokens:.2f}, "
msg += f"pre-allocated usage: {self.disagg_decode_prealloc_queue.num_tokens_pre_allocated / self.max_total_num_tokens:.2f}, "
msg += f"#retracted-req: {len(self.disagg_decode_prealloc_queue.retracted_queue)}, "
msg += (