Prevent memory leak of retract_decode when page_size > 1 (#4977)
This commit is contained in:
@@ -1220,10 +1220,8 @@ class ScheduleBatch(ScheduleBatchDisaggregationDecodeMixin):
|
|||||||
else:
|
else:
|
||||||
# TODO: apply more fine-grained retraction
|
# TODO: apply more fine-grained retraction
|
||||||
last_uncached_pos = (
|
last_uncached_pos = (
|
||||||
(len(req.prefix_indices) + server_args.page_size - 1)
|
len(req.prefix_indices) // server_args.page_size
|
||||||
// server_args.page_size
|
) * server_args.page_size
|
||||||
* server_args.page_size
|
|
||||||
)
|
|
||||||
token_indices = self.req_to_token_pool.req_to_token[
|
token_indices = self.req_to_token_pool.req_to_token[
|
||||||
req.req_pool_idx, last_uncached_pos : seq_lens_cpu[idx]
|
req.req_pool_idx, last_uncached_pos : seq_lens_cpu[idx]
|
||||||
]
|
]
|
||||||
|
|||||||
Reference in New Issue
Block a user