From e1b63624d79d7153e85ae6fe884619e097ffc1bd Mon Sep 17 00:00:00 2001 From: Lianmin Zheng Date: Fri, 22 Nov 2024 15:13:44 -0800 Subject: [PATCH] Only stream output on tp rank 0 (#2124) --- python/sglang/srt/managers/scheduler.py | 10 ++++++---- python/sglang/srt/model_executor/model_runner.py | 2 +- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/python/sglang/srt/managers/scheduler.py b/python/sglang/srt/managers/scheduler.py index de3c753ef..2c5482b28 100644 --- a/python/sglang/srt/managers/scheduler.py +++ b/python/sglang/srt/managers/scheduler.py @@ -134,8 +134,8 @@ class Scheduler: ) else: self.recv_from_tokenizer = None - self.send_to_tokenizer = SimpleNamespace(send_pyobj=lambda x: None) - self.send_to_detokenizer = SimpleNamespace(send_pyobj=lambda x: None) + self.send_to_tokenizer = SimpleNamespace(send_pyobj=lambda _: None) + self.send_to_detokenizer = SimpleNamespace(send_pyobj=lambda _: None) # Init tokenizer self.model_config = ModelConfig( @@ -1028,7 +1028,8 @@ class Scheduler: else: self.tree_cache.cache_unfinished_req(req) - self.stream_output(batch.reqs) + if self.tp_rank == 0: + self.stream_output(batch.reqs) def process_batch_result_decode(self, batch: ScheduleBatch, result): logits_output, next_token_ids, bid = result @@ -1079,7 +1080,8 @@ class Scheduler: torch.cuda.current_stream().synchronize() batch.next_batch_sampling_info.sampling_info_done.set() - self.stream_output(batch.reqs) + if self.tp_rank == 0: + self.stream_output(batch.reqs) self.token_to_kv_pool.free_group_end() diff --git a/python/sglang/srt/model_executor/model_runner.py b/python/sglang/srt/model_executor/model_runner.py index c2659f5b7..64fd79275 100644 --- a/python/sglang/srt/model_executor/model_runner.py +++ b/python/sglang/srt/model_executor/model_runner.py @@ -179,7 +179,7 @@ class ModelRunner: if self.device == "cuda": torch.cuda.set_device(self.gpu_id) backend = "nccl" - # ToDO(liangan1):Just use gloo to bypass the initilization fail + # TODO(liangan1): Just use gloo to bypass the initilization fail # Need to use xccl for xpu backend in the future elif self.device == "xpu": torch.xpu.set_device(self.gpu_id)