Prevent memory leak of retract_decode when page_size > 1 (#4977)
This commit is contained in:
@@ -1220,10 +1220,8 @@ class ScheduleBatch(ScheduleBatchDisaggregationDecodeMixin):
|
||||
else:
|
||||
# TODO: apply more fine-grained retraction
|
||||
last_uncached_pos = (
|
||||
(len(req.prefix_indices) + server_args.page_size - 1)
|
||||
// server_args.page_size
|
||||
* server_args.page_size
|
||||
)
|
||||
len(req.prefix_indices) // server_args.page_size
|
||||
) * server_args.page_size
|
||||
token_indices = self.req_to_token_pool.req_to_token[
|
||||
req.req_pool_idx, last_uncached_pos : seq_lens_cpu[idx]
|
||||
]
|
||||
|
||||
Reference in New Issue
Block a user