From 64f296f8e6a67f4c58cb30730fcf7ee2a54b5b5b Mon Sep 17 00:00:00 2001 From: Lianmin Zheng Date: Thu, 11 Sep 2025 07:06:29 -0700 Subject: [PATCH] [Minor] Improve the style of server args (#10328) --- python/sglang/srt/server_args.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/python/sglang/srt/server_args.py b/python/sglang/srt/server_args.py index fefdd547b..17371a66b 100644 --- a/python/sglang/srt/server_args.py +++ b/python/sglang/srt/server_args.py @@ -869,12 +869,6 @@ class ServerArgs: default=ServerArgs.tokenizer_path, help="The path of the tokenizer.", ) - parser.add_argument( - "--tokenizer-worker-num", - type=int, - default=ServerArgs.tokenizer_worker_num, - help="The worker num of the tokenizer manager.", - ) parser.add_argument( "--tokenizer-mode", type=str, @@ -884,6 +878,12 @@ class ServerArgs: "tokenizer if available, and 'slow' will " "always use the slow tokenizer.", ) + parser.add_argument( + "--tokenizer-worker-num", + type=int, + default=ServerArgs.tokenizer_worker_num, + help="The worker num of the tokenizer manager.", + ) parser.add_argument( "--skip-tokenizer-init", action="store_true", @@ -1721,20 +1721,22 @@ class ServerArgs: default=ServerArgs.moe_dense_tp_size, help="TP size for MoE dense MLP layers. This flag is useful when, with large TP size, there are errors caused by weights in MLP layers having dimension smaller than the min dimension GEMM supports.", ) + # Mamba Cache parser.add_argument( "--max-mamba-cache-size", type=int, default=ServerArgs.max_mamba_cache_size, - help="It is used for mamba cache memory static allocation.", + help="The maximum size of the mamba cache.", ) parser.add_argument( "--mamba-ssm-dtype", type=str, default=ServerArgs.mamba_ssm_dtype, choices=["float32", "bfloat16"], - help="It is used to tune mamba ssm dtype", + help="The data type of the SSM states in mamba cache.", ) + # Hierarchical cache parser.add_argument( "--enable-hierarchical-cache",