Support Multi Process Tokenizer Manager (#6555)

Signed-off-by: ybyang <ybyang7@iflytek.com>
Signed-off-by: huanglong <huanglong@linux.alibaba.com>
Co-authored-by: lw9527 <952799980@qq.com>
Co-authored-by: huanglong <huanglong@linux.alibaba.com>
Co-authored-by: Huang Long <121648372+LLLL114@users.noreply.github.com>
This commit is contained in:
ybyang
2025-08-08 16:45:50 +08:00
committed by GitHub
parent 6ee6619b7a
commit 7490e3f67d
9 changed files with 1133 additions and 73 deletions

View File

@@ -79,6 +79,7 @@ from sglang.srt.managers.io_struct import (
InitWeightsUpdateGroupReqInput,
LoadLoRAAdapterReqInput,
LoadLoRAAdapterReqOutput,
MultiTokenizerRegisterReq,
OpenSessionReqInput,
OpenSessionReqOutput,
ProfileReq,
@@ -247,7 +248,6 @@ class Scheduler(
# Init inter-process communication
context = zmq.Context(2)
self.idle_sleeper = None
if self.pp_rank == 0 and self.attn_tp_rank == 0:
self.recv_from_tokenizer = get_zmq_socket(
context, zmq.PULL, port_args.scheduler_input_ipc_name, False
@@ -522,6 +522,7 @@ class Scheduler(
(ExpertDistributionReq, self.expert_distribution_handle),
(LoadLoRAAdapterReqInput, self.load_lora_adapter),
(UnloadLoRAAdapterReqInput, self.unload_lora_adapter),
(MultiTokenizerRegisterReq, self.register_multi_tokenizer),
]
)
@@ -1063,6 +1064,8 @@ class Scheduler(
if self.recv_from_rpc is not None:
self.recv_from_rpc.send_pyobj(output)
else:
if recv_req.rids is not None:
output.rids = recv_req.rids
self.send_to_tokenizer.send_pyobj(output)
def handle_generate_request(
@@ -2400,6 +2403,10 @@ class Scheduler(
result = self.tp_worker.unload_lora_adapter(recv_req)
return result
def register_multi_tokenizer(self, recv_req: MultiTokenizerRegisterReq):
self.send_to_detokenizer.send_pyobj(recv_req)
return recv_req
def slow_down(self, recv_req: SlowDownReqInput):
t = recv_req.forward_sleep_time
if t is not None and t <= 0: