Fix memory leak for chunked prefill 2 (#1858)
Co-authored-by: Liangsheng Yin <hnyls2002@gmail.com>
This commit is contained in:
@@ -221,7 +221,7 @@ class Req:
|
||||
self.prefix_indices = []
|
||||
self.extend_input_len = 0
|
||||
self.last_node = None
|
||||
self.is_inflight_req = 0
|
||||
self.is_being_chunked = 0
|
||||
|
||||
# Logprobs (arguments)
|
||||
self.return_logprob = False
|
||||
@@ -888,7 +888,7 @@ class ScheduleBatch:
|
||||
|
||||
def filter_batch(
|
||||
self,
|
||||
current_inflight_req: Optional[Req] = None,
|
||||
being_chunked_req: Optional[Req] = None,
|
||||
keep_indices: Optional[List[int]] = None,
|
||||
):
|
||||
if keep_indices is None:
|
||||
@@ -896,7 +896,7 @@ class ScheduleBatch:
|
||||
i
|
||||
for i in range(len(self.reqs))
|
||||
if not self.reqs[i].finished()
|
||||
and self.reqs[i] is not current_inflight_req
|
||||
and self.reqs[i] is not being_chunked_req
|
||||
]
|
||||
|
||||
if keep_indices is None or len(keep_indices) == 0:
|
||||
|
||||
Reference in New Issue
Block a user