release initial code
Co-authored-by: Ying Sheng <sqy1415@gmail.com> Co-authored-by: Liangsheng Yin <hnyls2002@gmail.com> Co-authored-by: Zhiqiang Xie <xiezhq@stanford.edu> Co-authored-by: parasol-aser <3848358+parasol-aser@users.noreply.github.com> Co-authored-by: LiviaSun <33578456+ChuyueSun@users.noreply.github.com> Co-authored-by: Cody Yu <hao.yu.cody@gmail.com>
This commit is contained in:
138
python/sglang/srt/server_args.py
Normal file
138
python/sglang/srt/server_args.py
Normal file
@@ -0,0 +1,138 @@
|
||||
import argparse
|
||||
import dataclasses
|
||||
from typing import List, Optional
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class ServerArgs:
|
||||
model_path: str
|
||||
tokenizer_path: Optional[str] = None
|
||||
host: str = "127.0.0.1"
|
||||
port: int = 30000
|
||||
load_format: str = "auto"
|
||||
tokenizer_mode: str = "auto"
|
||||
trust_remote_code: bool = True
|
||||
mem_fraction_static: float = 0.91
|
||||
tp_size: int = 1
|
||||
model_mode: List[str] = ()
|
||||
schedule_heuristic: str = "lpm"
|
||||
random_seed: int = 42
|
||||
disable_log_stats: bool = False
|
||||
log_stats_interval: int = 10
|
||||
log_level: str = "info"
|
||||
|
||||
def __post_init__(self):
|
||||
if self.tokenizer_path is None:
|
||||
self.tokenizer_path = self.model_path
|
||||
|
||||
@staticmethod
|
||||
def add_cli_args(parser: argparse.ArgumentParser):
|
||||
parser.add_argument(
|
||||
"--model-path",
|
||||
type=str,
|
||||
help="The path of the model weights. This can be a local folder or a Hugging Face repo ID.",
|
||||
required=True,
|
||||
)
|
||||
parser.add_argument(
|
||||
"--tokenizer-path",
|
||||
type=str,
|
||||
default=ServerArgs.tokenizer_path,
|
||||
help="The path of the tokenizer.",
|
||||
)
|
||||
parser.add_argument("--host", type=str, default=ServerArgs.host)
|
||||
parser.add_argument("--port", type=int, default=ServerArgs.port)
|
||||
parser.add_argument(
|
||||
"--load-format",
|
||||
type=str,
|
||||
default=ServerArgs.load_format,
|
||||
choices=["auto", "pt", "safetensors", "npcache", "dummy"],
|
||||
help="The format of the model weights to load. "
|
||||
'"auto" will try to load the weights in the safetensors format '
|
||||
"and fall back to the pytorch bin format if safetensors format "
|
||||
"is not available. "
|
||||
'"pt" will load the weights in the pytorch bin format. '
|
||||
'"safetensors" will load the weights in the safetensors format. '
|
||||
'"npcache" will load the weights in pytorch format and store '
|
||||
"a numpy cache to speed up the loading. "
|
||||
'"dummy" will initialize the weights with random values, '
|
||||
"which is mainly for profiling.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--tokenizer-mode",
|
||||
type=str,
|
||||
default=ServerArgs.tokenizer_mode,
|
||||
choices=["auto", "slow"],
|
||||
help="Tokenizer mode. 'auto' will use the fast "
|
||||
"tokenizer if available, and 'slow' will "
|
||||
"always use the slow tokenizer.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--trust-remote-code",
|
||||
action="store_true",
|
||||
help="Whether or not to allow for custom models defined on the Hub in their own modeling files.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--mem-fraction-static",
|
||||
type=float,
|
||||
default=ServerArgs.mem_fraction_static,
|
||||
help="The fraction of the memory used for static allocation (model weights and KV cache memory pool)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--tp-size",
|
||||
type=int,
|
||||
default=ServerArgs.tp_size,
|
||||
help="Tensor parallelism degree.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--model-mode",
|
||||
type=str,
|
||||
default=[],
|
||||
nargs="+",
|
||||
help="Model mode: [flashinfer, no-cache, aggressive-new-fill]",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--schedule-heuristic",
|
||||
type=str,
|
||||
default=ServerArgs.schedule_heuristic,
|
||||
help="Schudule mode: [lpm, weight, random, fcfs]",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--random-seed",
|
||||
type=int,
|
||||
default=ServerArgs.random_seed,
|
||||
help="Random seed.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--log-level",
|
||||
type=str,
|
||||
default=ServerArgs.log_level,
|
||||
help="Log level",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--disable-log-stats",
|
||||
action="store_true",
|
||||
help="Disable logging throughput stats.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--log-stats-interval",
|
||||
type=int,
|
||||
default=ServerArgs.log_stats_interval,
|
||||
help="Log stats interval in second.",
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def from_cli_args(cls, args: argparse.Namespace):
|
||||
attrs = [attr.name for attr in dataclasses.fields(cls)]
|
||||
return cls(**{attr: getattr(args, attr) for attr in attrs})
|
||||
|
||||
def url(self):
|
||||
return f"http://{self.host}:{self.port}"
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class PortArgs:
|
||||
tokenizer_port: int
|
||||
router_port: int
|
||||
detokenizer_port: int
|
||||
nccl_port: int
|
||||
model_rpc_ports: List[int]
|
||||
Reference in New Issue
Block a user