diff --git a/python/sglang/srt/entrypoints/http_server.py b/python/sglang/srt/entrypoints/http_server.py index b82b40d1d..a2676038d 100644 --- a/python/sglang/srt/entrypoints/http_server.py +++ b/python/sglang/srt/entrypoints/http_server.py @@ -932,11 +932,9 @@ def launch_server( warmup_thread.join() -def _wait_and_warmup( +def _execute_server_warmup( server_args: ServerArgs, pipe_finish_writer: Optional[multiprocessing.connection.Connection], - image_token_text: str, - launch_callback: Optional[Callable[[], None]] = None, ): headers = {} url = server_args.url() @@ -961,7 +959,7 @@ def _wait_and_warmup( pipe_finish_writer.send(last_traceback) logger.error(f"Initialization failed. warmup error: {last_traceback}") kill_process_tree(os.getpid()) - return + return success model_info = res.json() @@ -1035,10 +1033,25 @@ def _wait_and_warmup( pipe_finish_writer.send(last_traceback) logger.error(f"Initialization failed. warmup error: {last_traceback}") kill_process_tree(os.getpid()) - return + return False # Debug print # logger.info(f"warmup request returns: {res.json()=}") + return success + + +def _wait_and_warmup( + server_args: ServerArgs, + pipe_finish_writer: Optional[multiprocessing.connection.Connection], + image_token_text: str, + launch_callback: Optional[Callable[[], None]] = None, +): + if not server_args.skip_server_warmup: + if not _execute_server_warmup( + server_args, + pipe_finish_writer, + ): + return logger.info("The server is fired up and ready to roll!") diff --git a/python/sglang/srt/server_args.py b/python/sglang/srt/server_args.py index 73a5845a0..0fb3c6af9 100644 --- a/python/sglang/srt/server_args.py +++ b/python/sglang/srt/server_args.py @@ -46,6 +46,7 @@ class ServerArgs: tokenizer_path: Optional[str] = None tokenizer_mode: str = "auto" skip_tokenizer_init: bool = False + skip_server_warmup: bool = False load_format: str = "auto" model_loader_extra_config: str = "{}" trust_remote_code: bool = False @@ -597,6 +598,11 @@ class ServerArgs: action="store_true", help="If set, skip init tokenizer and pass input_ids in generate request.", ) + parser.add_argument( + "--skip-server-warmup", + action="store_true", + help="If set, skip warmup.", + ) parser.add_argument( "--load-format", type=str,