Add a new arguments log_level_http to control the HTTP logging (#563)
This commit is contained in:
@@ -58,6 +58,8 @@ from sglang.srt.utils import (
|
|||||||
from sglang.utils import get_exception_traceback
|
from sglang.utils import get_exception_traceback
|
||||||
|
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())
|
asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())
|
||||||
|
|
||||||
|
|
||||||
@@ -192,7 +194,7 @@ def launch_server(server_args: ServerArgs, pipe_finish_writer, model_overide_arg
|
|||||||
for i in range(tp_size_local):
|
for i in range(tp_size_local):
|
||||||
start_rpyc_service_process(ModelTpService, model_port_args[0].model_tp_ports[i])
|
start_rpyc_service_process(ModelTpService, model_port_args[0].model_tp_ports[i])
|
||||||
if server_args.node_rank != 0:
|
if server_args.node_rank != 0:
|
||||||
print("Listen for connections...")
|
logger.info(f"[node_rank={server_args.node_rank}]: Listen for connections...")
|
||||||
while True:
|
while True:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@@ -275,9 +277,10 @@ def launch_server(server_args: ServerArgs, pipe_finish_writer, model_overide_arg
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
if pipe_finish_writer is not None:
|
if pipe_finish_writer is not None:
|
||||||
pipe_finish_writer.send(get_exception_traceback())
|
pipe_finish_writer.send(get_exception_traceback())
|
||||||
print(f"Initialization failed. warmup error: {e}")
|
print(f"Initialization failed. warmup error: {e}", flush=True)
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
|
logger.info("The server is fired up and ready to roll!")
|
||||||
if pipe_finish_writer is not None:
|
if pipe_finish_writer is not None:
|
||||||
pipe_finish_writer.send("init ok")
|
pipe_finish_writer.send("init ok")
|
||||||
|
|
||||||
@@ -290,7 +293,7 @@ def launch_server(server_args: ServerArgs, pipe_finish_writer, model_overide_arg
|
|||||||
app,
|
app,
|
||||||
host=server_args.host,
|
host=server_args.host,
|
||||||
port=server_args.port,
|
port=server_args.port,
|
||||||
log_level=server_args.log_level,
|
log_level=server_args.log_level_http or server_args.log_level_http,
|
||||||
timeout_keep_alive=5,
|
timeout_keep_alive=5,
|
||||||
loop="uvloop",
|
loop="uvloop",
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -37,9 +37,8 @@ class ServerArgs:
|
|||||||
|
|
||||||
# Logging
|
# Logging
|
||||||
log_level: str = "info"
|
log_level: str = "info"
|
||||||
|
log_level_http: Optional[str] = None
|
||||||
log_requests: bool = False
|
log_requests: bool = False
|
||||||
disable_log_stats: bool = False
|
|
||||||
log_stats_interval: int = 10
|
|
||||||
show_time_cost: bool = False
|
show_time_cost: bool = False
|
||||||
|
|
||||||
# Other
|
# Other
|
||||||
@@ -106,7 +105,7 @@ class ServerArgs:
|
|||||||
type=int,
|
type=int,
|
||||||
nargs="*",
|
nargs="*",
|
||||||
default=[],
|
default=[],
|
||||||
help="Additional ports specified for the server.",
|
help="The additional ports specified for the server.",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--load-format",
|
"--load-format",
|
||||||
@@ -179,7 +178,7 @@ class ServerArgs:
|
|||||||
type=str,
|
type=str,
|
||||||
default=ServerArgs.schedule_heuristic,
|
default=ServerArgs.schedule_heuristic,
|
||||||
choices=["lpm", "random", "fcfs", "dfs-weight"],
|
choices=["lpm", "random", "fcfs", "dfs-weight"],
|
||||||
help="Scheduling Heuristic.",
|
help="The scheduling heuristic.",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--schedule-conservativeness",
|
"--schedule-conservativeness",
|
||||||
@@ -191,7 +190,7 @@ class ServerArgs:
|
|||||||
"--tp-size",
|
"--tp-size",
|
||||||
type=int,
|
type=int,
|
||||||
default=ServerArgs.tp_size,
|
default=ServerArgs.tp_size,
|
||||||
help="Tensor parallelism size.",
|
help="The tensor parallelism size.",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--stream-interval",
|
"--stream-interval",
|
||||||
@@ -203,29 +202,24 @@ class ServerArgs:
|
|||||||
"--random-seed",
|
"--random-seed",
|
||||||
type=int,
|
type=int,
|
||||||
default=ServerArgs.random_seed,
|
default=ServerArgs.random_seed,
|
||||||
help="Random seed.",
|
help="The random seed.",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--log-level",
|
"--log-level",
|
||||||
type=str,
|
type=str,
|
||||||
default=ServerArgs.log_level,
|
default=ServerArgs.log_level,
|
||||||
help="Logging level",
|
help="The logging level of all loggers.",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--log-level-http",
|
||||||
|
type=str,
|
||||||
|
default=ServerArgs.log_level_http,
|
||||||
|
help="The logging level of HTTP server. If not set, reuse --log-level by default.",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--log-requests",
|
"--log-requests",
|
||||||
action="store_true",
|
action="store_true",
|
||||||
help="Log all requests",
|
help="Log the inputs and outputs of all requests.",
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--disable-log-stats",
|
|
||||||
action="store_true",
|
|
||||||
help="Disable logging throughput stats.",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--log-stats-interval",
|
|
||||||
type=int,
|
|
||||||
default=ServerArgs.log_stats_interval,
|
|
||||||
help="Log stats interval in second.",
|
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--show-time-cost",
|
"--show-time-cost",
|
||||||
@@ -244,13 +238,13 @@ class ServerArgs:
|
|||||||
"--dp-size",
|
"--dp-size",
|
||||||
type=int,
|
type=int,
|
||||||
default=ServerArgs.dp_size,
|
default=ServerArgs.dp_size,
|
||||||
help="Data parallelism size.",
|
help="The data parallelism size.",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--load-balance-method",
|
"--load-balance-method",
|
||||||
type=str,
|
type=str,
|
||||||
default=ServerArgs.load_balance_method,
|
default=ServerArgs.load_balance_method,
|
||||||
help="Load balancing strategy for data parallelism.",
|
help="The load balancing strategy for data parallelism.",
|
||||||
choices=[
|
choices=[
|
||||||
"round_robin",
|
"round_robin",
|
||||||
"shortest_queue",
|
"shortest_queue",
|
||||||
@@ -267,7 +261,7 @@ class ServerArgs:
|
|||||||
"--nnodes",
|
"--nnodes",
|
||||||
type=int,
|
type=int,
|
||||||
default=1,
|
default=1,
|
||||||
help="Number of nodes"
|
help="The number of nodes."
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--node-rank",
|
"--node-rank",
|
||||||
|
|||||||
Reference in New Issue
Block a user