Fix the perf regression due to additional_stop_token_ids (#1773)

This commit is contained in:
Lianmin Zheng
2024-10-23 16:45:21 -07:00
committed by GitHub
parent 05b3bf5e8e
commit 8f8f96a621
5 changed files with 20 additions and 16 deletions

View File

@@ -164,7 +164,7 @@ def get_tokenizer(
"slowdown. Consider using a fast tokenizer instead."
)
handle_additional_stop_token_ids(tokenizer)
attach_additional_stop_token_ids(tokenizer)
return tokenizer
@@ -184,11 +184,11 @@ def get_processor(
**kwargs,
)
handle_additional_stop_token_ids(processor.tokenizer)
attach_additional_stop_token_ids(processor.tokenizer)
return processor
def handle_additional_stop_token_ids(tokenizer):
def attach_additional_stop_token_ids(tokenizer):
# Special handling for stop token <|eom_id|> generated by llama 3 tool use.
if "<|eom_id|>" in tokenizer.get_added_vocab():
tokenizer.additional_stop_token_ids = set(