Fix memory leak during abort (#2238)

This commit is contained in:
Lianmin Zheng
2024-11-28 02:22:15 -08:00
committed by GitHub
parent d4fc1a70e3
commit b2ccf36d4d
6 changed files with 84 additions and 7 deletions

View File

@@ -231,6 +231,7 @@ class Req:
self.tokenizer = None
self.finished_reason = None
self.stream = False
self.to_abort = False
# For incremental decoding
# ----- | --------- read_ids -------|
@@ -368,6 +369,10 @@ class Req:
if self.finished():
return
if self.to_abort:
self.finished_reason = FINISH_ABORT()
return
if len(self.output_ids) >= self.sampling_params.max_new_tokens:
self.finished_reason = FINISH_LENGTH(
length=self.sampling_params.max_new_tokens