From 2e4a1e2d0570f66662cfb98fc2006b0aba2717fa Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 16 Mar 2025 18:10:09 -0700 Subject: [PATCH] Initialize image processor for skip-tokenizer-init codepath (#4479) Co-authored-by: Alex Kirillov --- .../sglang/srt/managers/tokenizer_manager.py | 39 +++++++++++-------- 1 file changed, 22 insertions(+), 17 deletions(-) diff --git a/python/sglang/srt/managers/tokenizer_manager.py b/python/sglang/srt/managers/tokenizer_manager.py index 3132060ed..c211d76ff 100644 --- a/python/sglang/srt/managers/tokenizer_manager.py +++ b/python/sglang/srt/managers/tokenizer_manager.py @@ -168,27 +168,32 @@ class TokenizerManager: self.context_len = self.model_config.context_len self.image_token_id = self.model_config.image_token_id - # Create image processor placeholder - self.image_processor = get_dummy_image_processor() + if self.model_config.is_multimodal: + _processor = get_processor( + server_args.tokenizer_path, + tokenizer_mode=server_args.tokenizer_mode, + trust_remote_code=server_args.trust_remote_code, + revision=server_args.revision, + ) - # Create tokenizer - if server_args.skip_tokenizer_init: - self.tokenizer = self.processor = None - else: - if self.model_config.is_multimodal: - self.processor = get_processor( - server_args.tokenizer_path, - tokenizer_mode=server_args.tokenizer_mode, - trust_remote_code=server_args.trust_remote_code, - revision=server_args.revision, - ) + # We want to parallelize the image pre-processing so we create an executor for it + # We creat image_processor for any skip_tokenizer_init to make sure we still encode + # images even with skip_tokenizer_init=False. + self.image_processor = get_image_processor( + self.model_config.hf_config, server_args, _processor + ) + + if server_args.skip_tokenizer_init: + self.tokenizer = self.processor = None + else: + self.processor = _processor self.tokenizer = self.processor.tokenizer os.environ["TOKENIZERS_PARALLELISM"] = "false" + else: + self.image_processor = get_dummy_image_processor() - # We want to parallelize the image pre-processing so we create an executor for it - self.image_processor = get_image_processor( - self.model_config.hf_config, server_args, self.processor - ) + if server_args.skip_tokenizer_init: + self.tokenizer = self.processor = None else: self.tokenizer = get_tokenizer( server_args.tokenizer_path,