Revert "Fix memory leak when doing chunked prefill" (#1797)

This commit is contained in:
Lianmin Zheng
2024-10-25 10:24:44 -07:00
committed by GitHub
parent 40900baea7
commit c555ce2ca2
6 changed files with 69 additions and 183 deletions

View File

@@ -222,7 +222,7 @@ class Req:
self.prefix_indices = []
self.extend_input_len = 0
self.last_node = None
self.is_being_chunked = False
self.is_inflight_req = 0
# Logprobs (arguments)
self.return_logprob = False
@@ -906,14 +906,15 @@ class ScheduleBatch:
def filter_batch(
self,
being_chunked_req: Optional[Req] = None,
current_inflight_req: Optional[Req] = None,
keep_indices: Optional[List[int]] = None,
):
if keep_indices is None:
keep_indices = [
i
for i in range(len(self.reqs))
if not self.reqs[i].finished() and self.reqs[i] is not being_chunked_req
if not self.reqs[i].finished()
and self.reqs[i] is not current_inflight_req
]
if keep_indices is None or len(keep_indices) == 0: