diff --git a/python/sglang/srt/managers/tokenizer_manager.py b/python/sglang/srt/managers/tokenizer_manager.py index f4bda8688..36eb3ddc3 100644 --- a/python/sglang/srt/managers/tokenizer_manager.py +++ b/python/sglang/srt/managers/tokenizer_manager.py @@ -566,7 +566,7 @@ class TokenizerManager: ) -> None: """Validates that the input token count and the requested token count doesn't exceed the model's context length.""" # FIXME: unify the length validation logic with the one in the scheduler. - _max_req_len = self.context_len - 1 + _max_req_len = self.context_len input_token_num = len(input_ids) if input_ids is not None else 0 if input_token_num >= self.context_len: