Support Multi Process Tokenizer Manager (#6555)
Signed-off-by: ybyang <ybyang7@iflytek.com> Signed-off-by: huanglong <huanglong@linux.alibaba.com> Co-authored-by: lw9527 <952799980@qq.com> Co-authored-by: huanglong <huanglong@linux.alibaba.com> Co-authored-by: Huang Long <121648372+LLLL114@users.noreply.github.com>
This commit is contained in:
@@ -51,6 +51,7 @@ class ServerArgs:
|
||||
model_path: str
|
||||
tokenizer_path: Optional[str] = None
|
||||
tokenizer_mode: str = "auto"
|
||||
tokenizer_worker_num: int = 1
|
||||
skip_tokenizer_init: bool = False
|
||||
load_format: str = "auto"
|
||||
model_loader_extra_config: str = "{}"
|
||||
@@ -730,6 +731,12 @@ class ServerArgs:
|
||||
default=ServerArgs.tokenizer_path,
|
||||
help="The path of the tokenizer.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--tokenizer-worker-num",
|
||||
type=int,
|
||||
default=ServerArgs.tokenizer_worker_num,
|
||||
help="The worker num of the tokenizer manager.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--tokenizer-mode",
|
||||
type=str,
|
||||
@@ -2081,6 +2088,9 @@ class ServerArgs:
|
||||
self.chunked_prefill_size % self.page_size == 0
|
||||
), "chunked_prefill_size must be divisible by page_size"
|
||||
|
||||
# Check multi tokenizer
|
||||
assert self.tokenizer_worker_num > 0, "Tokenizer worker num must >= 1"
|
||||
|
||||
def check_lora_server_args(self):
|
||||
assert (
|
||||
self.max_loras_per_batch > 0
|
||||
@@ -2246,6 +2256,9 @@ class PortArgs:
|
||||
# The ipc filename for Scheduler to send metrics
|
||||
metrics_ipc_name: str
|
||||
|
||||
# The ipc filename for Tokenizer and worker tokenizer
|
||||
tokenizer_worker_ipc_name: Optional[str]
|
||||
|
||||
@staticmethod
|
||||
def init_new(server_args, dp_rank: Optional[int] = None) -> "PortArgs":
|
||||
if server_args.nccl_port is None:
|
||||
@@ -2269,6 +2282,7 @@ class PortArgs:
|
||||
nccl_port=nccl_port,
|
||||
rpc_ipc_name=f"ipc://{tempfile.NamedTemporaryFile(delete=False).name}",
|
||||
metrics_ipc_name=f"ipc://{tempfile.NamedTemporaryFile(delete=False).name}",
|
||||
tokenizer_worker_ipc_name=None,
|
||||
)
|
||||
else:
|
||||
# DP attention. Use TCP + port to handle both single-node and multi-node.
|
||||
@@ -2302,6 +2316,7 @@ class PortArgs:
|
||||
nccl_port=nccl_port,
|
||||
rpc_ipc_name=f"tcp://{dist_init_host}:{rpc_port}",
|
||||
metrics_ipc_name=f"tcp://{dist_init_host}:{metrics_ipc_name}",
|
||||
tokenizer_worker_ipc_name=None,
|
||||
)
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user