From b803b395b79cc04ec431936d532719ced61796bb Mon Sep 17 00:00:00 2001 From: Cody Yu Date: Wed, 15 Jan 2025 03:29:33 -0800 Subject: [PATCH] Disable graceful shutdown of tokenizer manager when not in the main thread (#2872) --- .../sglang/srt/managers/tokenizer_manager.py | 26 +++++++++++++++---- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/python/sglang/srt/managers/tokenizer_manager.py b/python/sglang/srt/managers/tokenizer_manager.py index acd3b674a..eae3d87d7 100644 --- a/python/sglang/srt/managers/tokenizer_manager.py +++ b/python/sglang/srt/managers/tokenizer_manager.py @@ -21,6 +21,7 @@ import os import pickle import signal import sys +import threading import time import uuid from datetime import datetime @@ -265,10 +266,16 @@ class TokenizerManager: ) input_embeds = obj.input_embeds input_ids = obj.input_ids - elif obj.input_ids is None: - input_ids = self.tokenizer.encode(input_text) - else: + elif obj.input_ids is not None: input_ids = obj.input_ids + else: + if self.tokenizer is None: + raise ValueError( + "The engine initialized with skip_tokenizer_init=True cannot " + "accept text prompts. Please provide input_ids or re-initialize " + "the engine with skip_tokenizer_init=False." + ) + input_ids = self.tokenizer.encode(input_text) if self.is_generation: # TODO: also support getting embeddings for multimodal models @@ -635,8 +642,17 @@ class TokenizerManager: loop = asyncio.get_event_loop() self.asyncio_tasks.add(loop.create_task(self.handle_loop())) - signal_handler = SignalHandler(self) - loop.add_signal_handler(signal.SIGTERM, signal_handler.signal_handler) + # We cannot add signal handler when the tokenizer manager is not in + # the main thread due to the CPython limitation. + if threading.current_thread() is threading.main_thread(): + signal_handler = SignalHandler(self) + loop.add_signal_handler(signal.SIGTERM, signal_handler.signal_handler) + else: + logger.warning( + "Signal handler is not added because the tokenizer manager is " + "not in the main thread. This disables graceful shutdown of the " + "tokenizer manager when SIGTERM is received." + ) self.asyncio_tasks.add(loop.create_task(self.sigterm_watchdog())) async def sigterm_watchdog(self):