Prevent memory leak of retract_decode when page_size > 1 (#4977)

This commit is contained in:
Zhiqiang Xie
2025-04-01 15:30:45 -07:00
committed by GitHub
parent fda6bb78da
commit 12047f5e94

View File

@@ -1220,10 +1220,8 @@ class ScheduleBatch(ScheduleBatchDisaggregationDecodeMixin):
else:
# TODO: apply more fine-grained retraction
last_uncached_pos = (
(len(req.prefix_indices) + server_args.page_size - 1)
// server_args.page_size
* server_args.page_size
)
len(req.prefix_indices) // server_args.page_size
) * server_args.page_size
token_indices = self.req_to_token_pool.req_to_token[
req.req_pool_idx, last_uncached_pos : seq_lens_cpu[idx]
]