diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 72c04a8ca..e580b502f 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -9,5 +9,5 @@ /python/sglang/srt/models @merrymercy @Ying1123 @hnyls2002 @zhyncs @ispobock @ByronHsu /python/sglang/srt/openai_api @merrymercy @Ying1123 @hnyls2002 @zhyncs @ispobock @ByronHsu /python/sglang/srt/sampling @merrymercy @hnyls2002 -/test/lang @merrymercy @Ying1123 @hnyls2002 @ByronHsu -/test/srt @merrymercy @Ying1123 @hnyls2002 @zhyncs @ispobock @ByronHsu +/test/lang @merrymercy @Ying1123 @ByronHsu +/test/srt @merrymercy @Ying1123 @zhyncs diff --git a/python/sglang/srt/hf_transformers_utils.py b/python/sglang/srt/hf_transformers_utils.py index 0bb4872fe..56d7c8a1f 100644 --- a/python/sglang/srt/hf_transformers_utils.py +++ b/python/sglang/srt/hf_transformers_utils.py @@ -164,14 +164,7 @@ def get_tokenizer( "slowdown. Consider using a fast tokenizer instead." ) - # Special handling for stop token <|eom_id|> generated by llama 3 tool use. - if "<|eom_id|>" in tokenizer.get_added_vocab(): - tokenizer.additional_stop_token_ids = set( - [tokenizer.get_added_vocab()["<|eom_id|>"]] - ) - else: - tokenizer.additional_stop_token_ids = None - + handle_additional_stop_token_ids(tokenizer) return tokenizer @@ -190,4 +183,16 @@ def get_processor( tokenizer_revision=tokenizer_revision, **kwargs, ) + + handle_additional_stop_token_ids(processor.tokenizer) return processor + + +def handle_additional_stop_token_ids(tokenizer): + # Special handling for stop token <|eom_id|> generated by llama 3 tool use. + if "<|eom_id|>" in tokenizer.get_added_vocab(): + tokenizer.additional_stop_token_ids = set( + [tokenizer.get_added_vocab()["<|eom_id|>"]] + ) + else: + tokenizer.additional_stop_token_ids = None diff --git a/python/sglang/srt/sampling/sampling_params.py b/python/sglang/srt/sampling/sampling_params.py index e5151a1c2..b0863b557 100644 --- a/python/sglang/srt/sampling/sampling_params.py +++ b/python/sglang/srt/sampling/sampling_params.py @@ -135,7 +135,7 @@ class SamplingParams: self.stop_str_max_len = stop_str_max_len # Process stop token ids - if tokenizer.additional_stop_token_ids: + if tokenizer and tokenizer.additional_stop_token_ids: self.stop_token_ids.update(tokenizer.additional_stop_token_ids) def to_srt_kwargs(self):