Fix the default arguments of bench_offline_throughput.py & simplify detokenizer manager (#2042)

This commit is contained in:
Lianmin Zheng
2024-11-15 05:02:44 -08:00
committed by GitHub
parent 29ebe3dff4
commit 2558d6a675
5 changed files with 42 additions and 38 deletions

View File

@@ -100,20 +100,6 @@ class DetokenizerManager:
if isinstance(recv_obj, BatchEmbeddingOut):
# If it is embedding model, no detokenization is needed.
self.send_to_tokenizer.send_pyobj(
BatchEmbeddingOut(
rids=recv_obj.rids,
embeddings=recv_obj.embeddings,
meta_info=recv_obj.meta_info,
finished_reason=recv_obj.finished_reason,
)
)
continue
elif isinstance(recv_obj, UpdateWeightReqOutput):
# If it is a weight update request, no detokenization is needed.
self.send_to_tokenizer.send_pyobj(recv_obj)
continue
elif isinstance(recv_obj, GetMemPoolSizeReqOutput):
self.send_to_tokenizer.send_pyobj(recv_obj)
continue
else:

View File

@@ -114,6 +114,9 @@ class Scheduler:
self.recv_from_tokenizer = get_zmq_socket(
context, zmq.PULL, port_args.scheduler_input_ipc_name
)
self.send_to_tokenizer = get_zmq_socket(
context, zmq.PUSH, port_args.tokenizer_ipc_name
)
if server_args.skip_tokenizer_init:
# Directly send to the tokenizer/api
@@ -127,6 +130,7 @@ class Scheduler:
)
else:
self.recv_from_tokenizer = None
self.send_to_tokenizer = SimpleNamespace(send_pyobj=lambda x: None)
self.send_to_detokenizer = SimpleNamespace(send_pyobj=lambda x: None)
# Init tokenizer
@@ -421,7 +425,7 @@ class Scheduler:
self.abort_request(recv_req)
elif isinstance(recv_req, UpdateWeightReqInput):
success, message = self.update_weights(recv_req)
self.send_to_detokenizer.send_pyobj(
self.send_to_tokenizer.send_pyobj(
UpdateWeightReqOutput(success, message)
)
elif isinstance(recv_req, ProfileReq):
@@ -430,7 +434,7 @@ class Scheduler:
else:
self.stop_profile()
elif isinstance(recv_req, GetMemPoolSizeReq):
self.send_to_detokenizer.send_pyobj(
self.send_to_tokenizer.send_pyobj(
GetMemPoolSizeReqOutput(self.max_total_num_tokens)
)
else: