From 12047f5e942e55150a5f6a96457e18768bf9c913 Mon Sep 17 00:00:00 2001 From: Zhiqiang Xie Date: Tue, 1 Apr 2025 15:30:45 -0700 Subject: [PATCH] Prevent memory leak of retract_decode when page_size > 1 (#4977) --- python/sglang/srt/managers/schedule_batch.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/python/sglang/srt/managers/schedule_batch.py b/python/sglang/srt/managers/schedule_batch.py index 991ec0551..a8796cb42 100644 --- a/python/sglang/srt/managers/schedule_batch.py +++ b/python/sglang/srt/managers/schedule_batch.py @@ -1220,10 +1220,8 @@ class ScheduleBatch(ScheduleBatchDisaggregationDecodeMixin): else: # TODO: apply more fine-grained retraction last_uncached_pos = ( - (len(req.prefix_indices) + server_args.page_size - 1) - // server_args.page_size - * server_args.page_size - ) + len(req.prefix_indices) // server_args.page_size + ) * server_args.page_size token_indices = self.req_to_token_pool.req_to_token[ req.req_pool_idx, last_uncached_pos : seq_lens_cpu[idx] ]