Fix missing additional_stop_token_ids (#1769)
This commit is contained in:
@@ -164,14 +164,7 @@ def get_tokenizer(
|
||||
"slowdown. Consider using a fast tokenizer instead."
|
||||
)
|
||||
|
||||
# Special handling for stop token <|eom_id|> generated by llama 3 tool use.
|
||||
if "<|eom_id|>" in tokenizer.get_added_vocab():
|
||||
tokenizer.additional_stop_token_ids = set(
|
||||
[tokenizer.get_added_vocab()["<|eom_id|>"]]
|
||||
)
|
||||
else:
|
||||
tokenizer.additional_stop_token_ids = None
|
||||
|
||||
handle_additional_stop_token_ids(tokenizer)
|
||||
return tokenizer
|
||||
|
||||
|
||||
@@ -190,4 +183,16 @@ def get_processor(
|
||||
tokenizer_revision=tokenizer_revision,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
handle_additional_stop_token_ids(processor.tokenizer)
|
||||
return processor
|
||||
|
||||
|
||||
def handle_additional_stop_token_ids(tokenizer):
|
||||
# Special handling for stop token <|eom_id|> generated by llama 3 tool use.
|
||||
if "<|eom_id|>" in tokenizer.get_added_vocab():
|
||||
tokenizer.additional_stop_token_ids = set(
|
||||
[tokenizer.get_added_vocab()["<|eom_id|>"]]
|
||||
)
|
||||
else:
|
||||
tokenizer.additional_stop_token_ids = None
|
||||
|
||||
Reference in New Issue
Block a user