Fix the default arguments of bench_offline_throughput.py & simplify detokenizer manager (#2042)

This commit is contained in:
Lianmin Zheng
2024-11-15 05:02:44 -08:00
committed by GitHub
parent 29ebe3dff4
commit 2558d6a675
5 changed files with 42 additions and 38 deletions

View File

@@ -114,6 +114,9 @@ class Scheduler:
self.recv_from_tokenizer = get_zmq_socket(
context, zmq.PULL, port_args.scheduler_input_ipc_name
)
self.send_to_tokenizer = get_zmq_socket(
context, zmq.PUSH, port_args.tokenizer_ipc_name
)
if server_args.skip_tokenizer_init:
# Directly send to the tokenizer/api
@@ -127,6 +130,7 @@ class Scheduler:
)
else:
self.recv_from_tokenizer = None
self.send_to_tokenizer = SimpleNamespace(send_pyobj=lambda x: None)
self.send_to_detokenizer = SimpleNamespace(send_pyobj=lambda x: None)
# Init tokenizer
@@ -421,7 +425,7 @@ class Scheduler:
self.abort_request(recv_req)
elif isinstance(recv_req, UpdateWeightReqInput):
success, message = self.update_weights(recv_req)
self.send_to_detokenizer.send_pyobj(
self.send_to_tokenizer.send_pyobj(
UpdateWeightReqOutput(success, message)
)
elif isinstance(recv_req, ProfileReq):
@@ -430,7 +434,7 @@ class Scheduler:
else:
self.stop_profile()
elif isinstance(recv_req, GetMemPoolSizeReq):
self.send_to_detokenizer.send_pyobj(
self.send_to_tokenizer.send_pyobj(
GetMemPoolSizeReqOutput(self.max_total_num_tokens)
)
else: