Fix memory leak during abort (#2238)
This commit is contained in:
@@ -231,6 +231,7 @@ class Req:
|
||||
self.tokenizer = None
|
||||
self.finished_reason = None
|
||||
self.stream = False
|
||||
self.to_abort = False
|
||||
|
||||
# For incremental decoding
|
||||
# ----- | --------- read_ids -------|
|
||||
@@ -368,6 +369,10 @@ class Req:
|
||||
if self.finished():
|
||||
return
|
||||
|
||||
if self.to_abort:
|
||||
self.finished_reason = FINISH_ABORT()
|
||||
return
|
||||
|
||||
if len(self.output_ids) >= self.sampling_params.max_new_tokens:
|
||||
self.finished_reason = FINISH_LENGTH(
|
||||
length=self.sampling_params.max_new_tokens
|
||||
|
||||
Reference in New Issue
Block a user