Fix stop condition for <|eom_id|> (#1766)

This commit is contained in:
Lianmin Zheng
2024-10-23 10:47:12 -07:00
committed by GitHub
parent 9af7b88e3c
commit 80a905475d
2 changed files with 17 additions and 6 deletions

View File

@@ -163,6 +163,15 @@ def get_tokenizer(
"Using a slow tokenizer. This might cause a significant "
"slowdown. Consider using a fast tokenizer instead."
)
# Special handling for stop token <|eom_id|> generated by llama 3 tool use.
if "<|eom_id|>" in tokenizer.get_added_vocab():
tokenizer.additional_stop_token_ids = set(
[tokenizer.get_added_vocab()["<|eom_id|>"]]
)
else:
tokenizer.additional_stop_token_ids = None
return tokenizer