Signed-off-by: ybyang <ybyang7@iflytek.com> Signed-off-by: huanglong <huanglong@linux.alibaba.com> Co-authored-by: Huang Long <121648372+LLLL114@users.noreply.github.com> Co-authored-by: huanglong <huanglong@linux.alibaba.com> Co-authored-by: Shangming Cai <csmthu@gmail.com>
This commit is contained in:
@@ -128,6 +128,7 @@ class ServerArgs:
|
||||
model_path: str
|
||||
tokenizer_path: Optional[str] = None
|
||||
tokenizer_mode: str = "auto"
|
||||
tokenizer_worker_num: int = 1
|
||||
skip_tokenizer_init: bool = False
|
||||
load_format: str = "auto"
|
||||
model_loader_extra_config: str = "{}"
|
||||
@@ -827,6 +828,12 @@ class ServerArgs:
|
||||
default=ServerArgs.tokenizer_path,
|
||||
help="The path of the tokenizer.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--tokenizer-worker-num",
|
||||
type=int,
|
||||
default=ServerArgs.tokenizer_worker_num,
|
||||
help="The worker num of the tokenizer manager.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--tokenizer-mode",
|
||||
type=str,
|
||||
@@ -2176,6 +2183,9 @@ class ServerArgs:
|
||||
self.chunked_prefill_size % self.page_size == 0
|
||||
), "chunked_prefill_size must be divisible by page_size"
|
||||
|
||||
# Check multi tokenizer
|
||||
assert self.tokenizer_worker_num > 0, "Tokenizer worker num must >= 1"
|
||||
|
||||
def check_lora_server_args(self):
|
||||
assert self.max_loras_per_batch > 0, "max_loras_per_batch must be positive"
|
||||
|
||||
@@ -2419,6 +2429,9 @@ class PortArgs:
|
||||
# The ipc filename for Scheduler to send metrics
|
||||
metrics_ipc_name: str
|
||||
|
||||
# The ipc filename for Tokenizer and worker tokenizer
|
||||
tokenizer_worker_ipc_name: Optional[str]
|
||||
|
||||
@staticmethod
|
||||
def init_new(server_args, dp_rank: Optional[int] = None) -> "PortArgs":
|
||||
if server_args.nccl_port is None:
|
||||
@@ -2442,6 +2455,7 @@ class PortArgs:
|
||||
nccl_port=nccl_port,
|
||||
rpc_ipc_name=f"ipc://{tempfile.NamedTemporaryFile(delete=False).name}",
|
||||
metrics_ipc_name=f"ipc://{tempfile.NamedTemporaryFile(delete=False).name}",
|
||||
tokenizer_worker_ipc_name=None,
|
||||
)
|
||||
else:
|
||||
# DP attention. Use TCP + port to handle both single-node and multi-node.
|
||||
@@ -2475,6 +2489,7 @@ class PortArgs:
|
||||
nccl_port=nccl_port,
|
||||
rpc_ipc_name=f"tcp://{dist_init_host}:{rpc_port}",
|
||||
metrics_ipc_name=f"tcp://{dist_init_host}:{metrics_ipc_name}",
|
||||
tokenizer_worker_ipc_name=None,
|
||||
)
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user