Prevent memory leak of retract_decode when page_size > 1 (#4977)

This commit is contained in:
Zhiqiang Xie
2025-04-01 15:30:45 -07:00
committed by GitHub
parent fda6bb78da
commit 12047f5e94

View File

@@ -1220,10 +1220,8 @@ class ScheduleBatch(ScheduleBatchDisaggregationDecodeMixin):
else: else:
# TODO: apply more fine-grained retraction # TODO: apply more fine-grained retraction
last_uncached_pos = ( last_uncached_pos = (
(len(req.prefix_indices) + server_args.page_size - 1) len(req.prefix_indices) // server_args.page_size
// server_args.page_size ) * server_args.page_size
* server_args.page_size
)
token_indices = self.req_to_token_pool.req_to_token[ token_indices = self.req_to_token_pool.req_to_token[
req.req_pool_idx, last_uncached_pos : seq_lens_cpu[idx] req.req_pool_idx, last_uncached_pos : seq_lens_cpu[idx]
] ]