Fix the perf regression due to additional_stop_token_ids (#1773)

This commit is contained in:
Lianmin Zheng
2024-10-23 16:45:21 -07:00
committed by GitHub
parent 05b3bf5e8e
commit 8f8f96a621
5 changed files with 20 additions and 16 deletions

View File

@@ -334,15 +334,20 @@ class Req:
last_token_id = self.output_ids[-1]
matched_eos = last_token_id in self.sampling_params.stop_token_ids
matched_eos = False
# Check stop token ids
if self.sampling_params.stop_token_ids:
matched_eos = last_token_id in self.sampling_params.stop_token_ids
if self.tokenizer is not None:
matched_eos |= last_token_id == self.tokenizer.eos_token_id
if self.tokenizer.additional_stop_token_ids:
matched_eos |= last_token_id in self.tokenizer.additional_stop_token_ids
if matched_eos and not self.sampling_params.ignore_eos:
self.finished_reason = FINISH_MATCHED_TOKEN(matched=last_token_id)
return
# Check stop strings
if len(self.sampling_params.stop_strs) > 0:
tail_str = self.tokenizer.decode(
self.output_ids[-(self.sampling_params.stop_str_max_len + 1) :]