From ce216c80dc413e404b7f6f63a151436253c8b837 Mon Sep 17 00:00:00 2001 From: Qubitium <417764+Qubitium@users.noreply.github.com> Date: Sun, 24 Mar 2024 01:15:16 +0800 Subject: [PATCH] Cleanup codebase: removed unnecessary code/logic (#298) --- python/sglang/srt/managers/router/manager.py | 8 ++++++-- python/sglang/srt/managers/router/model_rpc.py | 1 - python/sglang/srt/managers/tokenizer_manager.py | 7 ++----- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/python/sglang/srt/managers/router/manager.py b/python/sglang/srt/managers/router/manager.py index 4dc7d1f1c..7562a3022 100644 --- a/python/sglang/srt/managers/router/manager.py +++ b/python/sglang/srt/managers/router/manager.py @@ -41,12 +41,16 @@ class RouterManager: self.send_to_detokenizer.send_pyobj(obj) # async sleep for receiving the subsequent request and avoiding cache miss + slept = False if len(out_pyobjs) != 0: has_finished = any([obj.finished for obj in out_pyobjs]) if has_finished: - await asyncio.sleep(self.extend_dependency_time) + if self.extend_dependency_time > 0: + slept = True + await asyncio.sleep(self.extend_dependency_time) - await asyncio.sleep(0.0006) + if not slept: + await asyncio.sleep(0.0006) async def loop_for_recv_requests(self): while True: diff --git a/python/sglang/srt/managers/router/model_rpc.py b/python/sglang/srt/managers/router/model_rpc.py index 68f8423ed..6468c2d5f 100644 --- a/python/sglang/srt/managers/router/model_rpc.py +++ b/python/sglang/srt/managers/router/model_rpc.py @@ -90,7 +90,6 @@ class ModelRpcServer(rpyc.Service): tokenizer_mode=server_args.tokenizer_mode, trust_remote_code=server_args.trust_remote_code, ) - self.eos_token_id = self.tokenizer.eos_token_id self.max_total_num_token = self.model_runner.max_total_num_token self.max_num_running_seq = self.max_total_num_token // 2 self.max_prefill_num_token = max( diff --git a/python/sglang/srt/managers/tokenizer_manager.py b/python/sglang/srt/managers/tokenizer_manager.py index 814141e1f..7947ca2ff 100644 --- a/python/sglang/srt/managers/tokenizer_manager.py +++ b/python/sglang/srt/managers/tokenizer_manager.py @@ -36,7 +36,6 @@ class ReqState: out_list: List finished: bool event: asyncio.Event - lock: asyncio.Lock global global_processor @@ -178,9 +177,8 @@ class TokenizerManager: ) self.send_to_router.send_pyobj(tokenized_obj) - lock = asyncio.Lock() event = asyncio.Event() - state = ReqState([], False, event, lock) + state = ReqState([], False, event) self.rid_to_state[rid] = state while True: @@ -221,9 +219,8 @@ class TokenizerManager: ) self.send_to_router.send_pyobj(tokenized_obj) - lock = asyncio.Lock() event = asyncio.Event() - state = ReqState([], False, event, lock) + state = ReqState([], False, event) self.rid_to_state[rid] = state output_list = []