Revert "Support Multi Process Tokenizer Manager" (#8960)

This commit is contained in:
Lianmin Zheng
2025-08-08 02:28:27 -07:00
committed by GitHub
parent 41357e511b
commit a947154286
9 changed files with 73 additions and 1133 deletions

View File

@@ -79,7 +79,6 @@ from sglang.srt.managers.io_struct import (
InitWeightsUpdateGroupReqInput,
LoadLoRAAdapterReqInput,
LoadLoRAAdapterReqOutput,
MultiTokenizerRegisterReq,
OpenSessionReqInput,
OpenSessionReqOutput,
ProfileReq,
@@ -248,6 +247,7 @@ class Scheduler(
# Init inter-process communication
context = zmq.Context(2)
self.idle_sleeper = None
if self.pp_rank == 0 and self.attn_tp_rank == 0:
self.recv_from_tokenizer = get_zmq_socket(
context, zmq.PULL, port_args.scheduler_input_ipc_name, False
@@ -522,7 +522,6 @@ class Scheduler(
(ExpertDistributionReq, self.expert_distribution_handle),
(LoadLoRAAdapterReqInput, self.load_lora_adapter),
(UnloadLoRAAdapterReqInput, self.unload_lora_adapter),
(MultiTokenizerRegisterReq, self.register_multi_tokenizer),
]
)
@@ -1065,8 +1064,6 @@ class Scheduler(
if self.recv_from_rpc is not None:
self.recv_from_rpc.send_pyobj(output)
else:
if recv_req.rids is not None:
output.rids = recv_req.rids
self.send_to_tokenizer.send_pyobj(output)
def handle_generate_request(
@@ -2407,10 +2404,6 @@ class Scheduler(
result = self.tp_worker.unload_lora_adapter(recv_req)
return result
def register_multi_tokenizer(self, recv_req: MultiTokenizerRegisterReq):
self.send_to_detokenizer.send_pyobj(recv_req)
return recv_req
def slow_down(self, recv_req: SlowDownReqInput):
t = recv_req.forward_sleep_time
if t is not None and t <= 0: