Support Multi Process Tokenizer Manager(#6555) (#8964)

Signed-off-by: ybyang <ybyang7@iflytek.com>
Signed-off-by: huanglong <huanglong@linux.alibaba.com>
Co-authored-by: Huang Long <121648372+LLLL114@users.noreply.github.com>
Co-authored-by: huanglong <huanglong@linux.alibaba.com>
Co-authored-by: Shangming Cai <csmthu@gmail.com>
This commit is contained in:
ybyang
2025-09-01 16:00:13 +08:00
committed by GitHub
parent 4750cddf68
commit 5f77e1292d
11 changed files with 1030 additions and 80 deletions

View File

@@ -60,6 +60,7 @@ from sglang.srt.managers.io_struct import (
UpdateWeightsFromDistributedReqInput,
UpdateWeightsFromTensorReqInput,
)
from sglang.srt.managers.multi_tokenizer_mixin import MultiTokenizerRouter
from sglang.srt.managers.scheduler import run_scheduler_process
from sglang.srt.managers.template_manager import TemplateManager
from sglang.srt.managers.tokenizer_manager import TokenizerManager
@@ -814,18 +815,24 @@ def _launch_subprocesses(
),
)
detoken_proc.start()
if server_args.tokenizer_worker_num > 1:
# Launch multi-tokenizer router
tokenizer_manager = MultiTokenizerRouter(server_args, port_args)
# Launch tokenizer process
tokenizer_manager = TokenizerManager(server_args, port_args)
# Initialize templates
template_manager = None
else:
# Launch tokenizer process
tokenizer_manager = TokenizerManager(server_args, port_args)
# Initialize templates
template_manager = TemplateManager()
template_manager.initialize_templates(
tokenizer_manager=tokenizer_manager,
model_path=server_args.model_path,
chat_template=server_args.chat_template,
completion_template=server_args.completion_template,
)
# Initialize templates
template_manager = TemplateManager()
template_manager.initialize_templates(
tokenizer_manager=tokenizer_manager,
model_path=server_args.model_path,
chat_template=server_args.chat_template,
completion_template=server_args.completion_template,
)
# Wait for the model to finish loading
scheduler_infos = []