diff --git a/python/sglang/srt/managers/router/manager.py b/python/sglang/srt/managers/router/manager.py index 4dc7d1f1c..7562a3022 100644 --- a/python/sglang/srt/managers/router/manager.py +++ b/python/sglang/srt/managers/router/manager.py @@ -41,12 +41,16 @@ class RouterManager: self.send_to_detokenizer.send_pyobj(obj) # async sleep for receiving the subsequent request and avoiding cache miss + slept = False if len(out_pyobjs) != 0: has_finished = any([obj.finished for obj in out_pyobjs]) if has_finished: - await asyncio.sleep(self.extend_dependency_time) + if self.extend_dependency_time > 0: + slept = True + await asyncio.sleep(self.extend_dependency_time) - await asyncio.sleep(0.0006) + if not slept: + await asyncio.sleep(0.0006) async def loop_for_recv_requests(self): while True: diff --git a/python/sglang/srt/managers/router/model_rpc.py b/python/sglang/srt/managers/router/model_rpc.py index 68f8423ed..6468c2d5f 100644 --- a/python/sglang/srt/managers/router/model_rpc.py +++ b/python/sglang/srt/managers/router/model_rpc.py @@ -90,7 +90,6 @@ class ModelRpcServer(rpyc.Service): tokenizer_mode=server_args.tokenizer_mode, trust_remote_code=server_args.trust_remote_code, ) - self.eos_token_id = self.tokenizer.eos_token_id self.max_total_num_token = self.model_runner.max_total_num_token self.max_num_running_seq = self.max_total_num_token // 2 self.max_prefill_num_token = max( diff --git a/python/sglang/srt/managers/tokenizer_manager.py b/python/sglang/srt/managers/tokenizer_manager.py index 814141e1f..7947ca2ff 100644 --- a/python/sglang/srt/managers/tokenizer_manager.py +++ b/python/sglang/srt/managers/tokenizer_manager.py @@ -36,7 +36,6 @@ class ReqState: out_list: List finished: bool event: asyncio.Event - lock: asyncio.Lock global global_processor @@ -178,9 +177,8 @@ class TokenizerManager: ) self.send_to_router.send_pyobj(tokenized_obj) - lock = asyncio.Lock() event = asyncio.Event() - state = ReqState([], False, event, lock) + state = ReqState([], False, event) self.rid_to_state[rid] = state while True: @@ -221,9 +219,8 @@ class TokenizerManager: ) self.send_to_router.send_pyobj(tokenized_obj) - lock = asyncio.Lock() event = asyncio.Event() - state = ReqState([], False, event, lock) + state = ReqState([], False, event) self.rid_to_state[rid] = state output_list = []