Support Multi Process Tokenizer Manager (#6555)

Signed-off-by: ybyang <ybyang7@iflytek.com>
Signed-off-by: huanglong <huanglong@linux.alibaba.com>
Co-authored-by: lw9527 <952799980@qq.com>
Co-authored-by: huanglong <huanglong@linux.alibaba.com>
Co-authored-by: Huang Long <121648372+LLLL114@users.noreply.github.com>
This commit is contained in:
ybyang
2025-08-08 16:45:50 +08:00
committed by GitHub
parent 6ee6619b7a
commit 7490e3f67d
9 changed files with 1133 additions and 73 deletions

View File

@@ -51,6 +51,7 @@ class ServerArgs:
model_path: str
tokenizer_path: Optional[str] = None
tokenizer_mode: str = "auto"
tokenizer_worker_num: int = 1
skip_tokenizer_init: bool = False
load_format: str = "auto"
model_loader_extra_config: str = "{}"
@@ -730,6 +731,12 @@ class ServerArgs:
default=ServerArgs.tokenizer_path,
help="The path of the tokenizer.",
)
parser.add_argument(
"--tokenizer-worker-num",
type=int,
default=ServerArgs.tokenizer_worker_num,
help="The worker num of the tokenizer manager.",
)
parser.add_argument(
"--tokenizer-mode",
type=str,
@@ -2081,6 +2088,9 @@ class ServerArgs:
self.chunked_prefill_size % self.page_size == 0
), "chunked_prefill_size must be divisible by page_size"
# Check multi tokenizer
assert self.tokenizer_worker_num > 0, "Tokenizer worker num must >= 1"
def check_lora_server_args(self):
assert (
self.max_loras_per_batch > 0
@@ -2246,6 +2256,9 @@ class PortArgs:
# The ipc filename for Scheduler to send metrics
metrics_ipc_name: str
# The ipc filename for Tokenizer and worker tokenizer
tokenizer_worker_ipc_name: Optional[str]
@staticmethod
def init_new(server_args, dp_rank: Optional[int] = None) -> "PortArgs":
if server_args.nccl_port is None:
@@ -2269,6 +2282,7 @@ class PortArgs:
nccl_port=nccl_port,
rpc_ipc_name=f"ipc://{tempfile.NamedTemporaryFile(delete=False).name}",
metrics_ipc_name=f"ipc://{tempfile.NamedTemporaryFile(delete=False).name}",
tokenizer_worker_ipc_name=None,
)
else:
# DP attention. Use TCP + port to handle both single-node and multi-node.
@@ -2302,6 +2316,7 @@ class PortArgs:
nccl_port=nccl_port,
rpc_ipc_name=f"tcp://{dist_init_host}:{rpc_port}",
metrics_ipc_name=f"tcp://{dist_init_host}:{metrics_ipc_name}",
tokenizer_worker_ipc_name=None,
)