diff --git a/python/sglang/srt/entrypoints/http_server.py b/python/sglang/srt/entrypoints/http_server.py index 110292114..9adac76ce 100644 --- a/python/sglang/srt/entrypoints/http_server.py +++ b/python/sglang/srt/entrypoints/http_server.py @@ -93,6 +93,7 @@ from sglang.srt.managers.io_struct import ( from sglang.srt.managers.multi_tokenizer_mixin import ( MultiTokenizerManager, get_main_process_id, + monkey_patch_uvicorn_multiprocessing, read_from_shared_memory, write_data_for_multi_tokenizer, ) @@ -1219,6 +1220,9 @@ def launch_server( "level": "INFO", "propagate": False, } + + monkey_patch_uvicorn_multiprocessing() + uvicorn.run( "sglang.srt.entrypoints.http_server:app", host=server_args.host, diff --git a/python/sglang/srt/managers/multi_tokenizer_mixin.py b/python/sglang/srt/managers/multi_tokenizer_mixin.py index e4f83c82b..8274003ad 100644 --- a/python/sglang/srt/managers/multi_tokenizer_mixin.py +++ b/python/sglang/srt/managers/multi_tokenizer_mixin.py @@ -19,6 +19,7 @@ import os import pickle import sys import threading +from functools import partialmethod from multiprocessing import shared_memory from typing import Any, Dict @@ -556,3 +557,17 @@ def write_data_for_multi_tokenizer( args_shm.close() return args_shm + + +def monkey_patch_uvicorn_multiprocessing(timeout: float = 10): + """Monkey patch uvicorn multiprocessing is_alive timeout""" + # from default 5s -> 10s + try: + from uvicorn.supervisors.multiprocess import Process + + Process.is_alive = partialmethod(Process.is_alive, timeout=timeout) + + except ImportError: + logger.warning( + "uvicorn.supervisors.multiprocess not found, skipping monkey patch" + )