Fix memory leak for chunked prefill 2 (#1858)

Co-authored-by: Liangsheng Yin <hnyls2002@gmail.com>
This commit is contained in:
Lianmin Zheng
2024-10-31 14:51:51 -07:00
committed by GitHub
parent 8ce202a493
commit a2e0424abf
7 changed files with 138 additions and 30 deletions

View File

@@ -221,7 +221,7 @@ class Req:
self.prefix_indices = []
self.extend_input_len = 0
self.last_node = None
self.is_inflight_req = 0
self.is_being_chunked = 0
# Logprobs (arguments)
self.return_logprob = False
@@ -888,7 +888,7 @@ class ScheduleBatch:
def filter_batch(
self,
current_inflight_req: Optional[Req] = None,
being_chunked_req: Optional[Req] = None,
keep_indices: Optional[List[int]] = None,
):
if keep_indices is None:
@@ -896,7 +896,7 @@ class ScheduleBatch:
i
for i in range(len(self.reqs))
if not self.reqs[i].finished()
and self.reqs[i] is not current_inflight_req
and self.reqs[i] is not being_chunked_req
]
if keep_indices is None or len(keep_indices) == 0: