Only stream output on tp rank 0 (#2124)
This commit is contained in:
@@ -134,8 +134,8 @@ class Scheduler:
|
|||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
self.recv_from_tokenizer = None
|
self.recv_from_tokenizer = None
|
||||||
self.send_to_tokenizer = SimpleNamespace(send_pyobj=lambda x: None)
|
self.send_to_tokenizer = SimpleNamespace(send_pyobj=lambda _: None)
|
||||||
self.send_to_detokenizer = SimpleNamespace(send_pyobj=lambda x: None)
|
self.send_to_detokenizer = SimpleNamespace(send_pyobj=lambda _: None)
|
||||||
|
|
||||||
# Init tokenizer
|
# Init tokenizer
|
||||||
self.model_config = ModelConfig(
|
self.model_config = ModelConfig(
|
||||||
@@ -1028,7 +1028,8 @@ class Scheduler:
|
|||||||
else:
|
else:
|
||||||
self.tree_cache.cache_unfinished_req(req)
|
self.tree_cache.cache_unfinished_req(req)
|
||||||
|
|
||||||
self.stream_output(batch.reqs)
|
if self.tp_rank == 0:
|
||||||
|
self.stream_output(batch.reqs)
|
||||||
|
|
||||||
def process_batch_result_decode(self, batch: ScheduleBatch, result):
|
def process_batch_result_decode(self, batch: ScheduleBatch, result):
|
||||||
logits_output, next_token_ids, bid = result
|
logits_output, next_token_ids, bid = result
|
||||||
@@ -1079,7 +1080,8 @@ class Scheduler:
|
|||||||
torch.cuda.current_stream().synchronize()
|
torch.cuda.current_stream().synchronize()
|
||||||
batch.next_batch_sampling_info.sampling_info_done.set()
|
batch.next_batch_sampling_info.sampling_info_done.set()
|
||||||
|
|
||||||
self.stream_output(batch.reqs)
|
if self.tp_rank == 0:
|
||||||
|
self.stream_output(batch.reqs)
|
||||||
|
|
||||||
self.token_to_kv_pool.free_group_end()
|
self.token_to_kv_pool.free_group_end()
|
||||||
|
|
||||||
|
|||||||
@@ -179,7 +179,7 @@ class ModelRunner:
|
|||||||
if self.device == "cuda":
|
if self.device == "cuda":
|
||||||
torch.cuda.set_device(self.gpu_id)
|
torch.cuda.set_device(self.gpu_id)
|
||||||
backend = "nccl"
|
backend = "nccl"
|
||||||
# ToDO(liangan1):Just use gloo to bypass the initilization fail
|
# TODO(liangan1): Just use gloo to bypass the initilization fail
|
||||||
# Need to use xccl for xpu backend in the future
|
# Need to use xccl for xpu backend in the future
|
||||||
elif self.device == "xpu":
|
elif self.device == "xpu":
|
||||||
torch.xpu.set_device(self.gpu_id)
|
torch.xpu.set_device(self.gpu_id)
|
||||||
|
|||||||
Reference in New Issue
Block a user