From 2187f36237eb532f7a9eab92c198ebd3571e1494 Mon Sep 17 00:00:00 2001 From: Lianmin Zheng Date: Tue, 25 Jun 2024 01:16:20 -0700 Subject: [PATCH] Add a new arguments log_level_http to control the HTTP logging (#563) --- python/sglang/srt/server.py | 9 +++++--- python/sglang/srt/server_args.py | 38 ++++++++++++++------------------ 2 files changed, 22 insertions(+), 25 deletions(-) diff --git a/python/sglang/srt/server.py b/python/sglang/srt/server.py index 9b23c799f..fb19a0348 100644 --- a/python/sglang/srt/server.py +++ b/python/sglang/srt/server.py @@ -58,6 +58,8 @@ from sglang.srt.utils import ( from sglang.utils import get_exception_traceback +logger = logging.getLogger(__name__) + asyncio.set_event_loop_policy(uvloop.EventLoopPolicy()) @@ -192,7 +194,7 @@ def launch_server(server_args: ServerArgs, pipe_finish_writer, model_overide_arg for i in range(tp_size_local): start_rpyc_service_process(ModelTpService, model_port_args[0].model_tp_ports[i]) if server_args.node_rank != 0: - print("Listen for connections...") + logger.info(f"[node_rank={server_args.node_rank}]: Listen for connections...") while True: pass @@ -275,9 +277,10 @@ def launch_server(server_args: ServerArgs, pipe_finish_writer, model_overide_arg except Exception as e: if pipe_finish_writer is not None: pipe_finish_writer.send(get_exception_traceback()) - print(f"Initialization failed. warmup error: {e}") + print(f"Initialization failed. warmup error: {e}", flush=True) raise e + logger.info("The server is fired up and ready to roll!") if pipe_finish_writer is not None: pipe_finish_writer.send("init ok") @@ -290,7 +293,7 @@ def launch_server(server_args: ServerArgs, pipe_finish_writer, model_overide_arg app, host=server_args.host, port=server_args.port, - log_level=server_args.log_level, + log_level=server_args.log_level_http or server_args.log_level_http, timeout_keep_alive=5, loop="uvloop", ) diff --git a/python/sglang/srt/server_args.py b/python/sglang/srt/server_args.py index 183afb3b8..75d9033d6 100644 --- a/python/sglang/srt/server_args.py +++ b/python/sglang/srt/server_args.py @@ -37,9 +37,8 @@ class ServerArgs: # Logging log_level: str = "info" + log_level_http: Optional[str] = None log_requests: bool = False - disable_log_stats: bool = False - log_stats_interval: int = 10 show_time_cost: bool = False # Other @@ -106,7 +105,7 @@ class ServerArgs: type=int, nargs="*", default=[], - help="Additional ports specified for the server.", + help="The additional ports specified for the server.", ) parser.add_argument( "--load-format", @@ -179,7 +178,7 @@ class ServerArgs: type=str, default=ServerArgs.schedule_heuristic, choices=["lpm", "random", "fcfs", "dfs-weight"], - help="Scheduling Heuristic.", + help="The scheduling heuristic.", ) parser.add_argument( "--schedule-conservativeness", @@ -191,7 +190,7 @@ class ServerArgs: "--tp-size", type=int, default=ServerArgs.tp_size, - help="Tensor parallelism size.", + help="The tensor parallelism size.", ) parser.add_argument( "--stream-interval", @@ -203,29 +202,24 @@ class ServerArgs: "--random-seed", type=int, default=ServerArgs.random_seed, - help="Random seed.", + help="The random seed.", ) parser.add_argument( "--log-level", type=str, default=ServerArgs.log_level, - help="Logging level", + help="The logging level of all loggers.", + ) + parser.add_argument( + "--log-level-http", + type=str, + default=ServerArgs.log_level_http, + help="The logging level of HTTP server. If not set, reuse --log-level by default.", ) parser.add_argument( "--log-requests", action="store_true", - help="Log all requests", - ) - parser.add_argument( - "--disable-log-stats", - action="store_true", - help="Disable logging throughput stats.", - ) - parser.add_argument( - "--log-stats-interval", - type=int, - default=ServerArgs.log_stats_interval, - help="Log stats interval in second.", + help="Log the inputs and outputs of all requests.", ) parser.add_argument( "--show-time-cost", @@ -244,13 +238,13 @@ class ServerArgs: "--dp-size", type=int, default=ServerArgs.dp_size, - help="Data parallelism size.", + help="The data parallelism size.", ) parser.add_argument( "--load-balance-method", type=str, default=ServerArgs.load_balance_method, - help="Load balancing strategy for data parallelism.", + help="The load balancing strategy for data parallelism.", choices=[ "round_robin", "shortest_queue", @@ -267,7 +261,7 @@ class ServerArgs: "--nnodes", type=int, default=1, - help="Number of nodes" + help="The number of nodes." ) parser.add_argument( "--node-rank",