Signed-off-by: ybyang <ybyang7@iflytek.com> Signed-off-by: huanglong <huanglong@linux.alibaba.com> Co-authored-by: Huang Long <121648372+LLLL114@users.noreply.github.com> Co-authored-by: huanglong <huanglong@linux.alibaba.com> Co-authored-by: Shangming Cai <csmthu@gmail.com>
This commit is contained in:
@@ -84,6 +84,8 @@ from sglang.srt.managers.io_struct import (
|
||||
InitWeightsUpdateGroupReqInput,
|
||||
LoadLoRAAdapterReqInput,
|
||||
LoadLoRAAdapterReqOutput,
|
||||
MultiTokenizerRegisterReq,
|
||||
MultiTokenizerWarpper,
|
||||
OpenSessionReqInput,
|
||||
OpenSessionReqOutput,
|
||||
ProfileReq,
|
||||
@@ -257,7 +259,6 @@ class Scheduler(
|
||||
# Init inter-process communication
|
||||
context = zmq.Context(2)
|
||||
self.idle_sleeper = None
|
||||
|
||||
if self.pp_rank == 0 and self.attn_tp_rank == 0:
|
||||
self.recv_from_tokenizer = get_zmq_socket(
|
||||
context, zmq.PULL, port_args.scheduler_input_ipc_name, False
|
||||
@@ -540,6 +541,7 @@ class Scheduler(
|
||||
(ExpertDistributionReq, self.expert_distribution_handle),
|
||||
(LoadLoRAAdapterReqInput, self.load_lora_adapter),
|
||||
(UnloadLoRAAdapterReqInput, self.unload_lora_adapter),
|
||||
(MultiTokenizerRegisterReq, self.register_multi_tokenizer),
|
||||
]
|
||||
)
|
||||
|
||||
@@ -1101,6 +1103,17 @@ class Scheduler(
|
||||
)
|
||||
self.send_to_tokenizer.send_pyobj(abort_req)
|
||||
continue
|
||||
|
||||
# If it is a MultiTokenizerWarpper, unwrap it and handle the inner request.
|
||||
if isinstance(recv_req, MultiTokenizerWarpper):
|
||||
worker_id = recv_req.worker_id
|
||||
recv_req = recv_req.obj
|
||||
output = self._request_dispatcher(recv_req)
|
||||
if output is not None:
|
||||
output = MultiTokenizerWarpper(worker_id, output)
|
||||
self.send_to_tokenizer.send_pyobj(output)
|
||||
continue
|
||||
|
||||
output = self._request_dispatcher(recv_req)
|
||||
if output is not None:
|
||||
if isinstance(output, RpcReqOutput):
|
||||
@@ -2474,6 +2487,10 @@ class Scheduler(
|
||||
result = self.tp_worker.unload_lora_adapter(recv_req)
|
||||
return result
|
||||
|
||||
def register_multi_tokenizer(self, recv_req: MultiTokenizerRegisterReq):
|
||||
self.send_to_detokenizer.send_pyobj(recv_req)
|
||||
return recv_req
|
||||
|
||||
def slow_down(self, recv_req: SlowDownReqInput):
|
||||
t = recv_req.forward_sleep_time
|
||||
if t is not None and t <= 0:
|
||||
|
||||
Reference in New Issue
Block a user