support 1 shot allreduce in 1-node and 2-node using mscclpp (#6277)
This commit is contained in:
@@ -35,6 +35,7 @@ from sglang.srt.distributed import (
|
||||
init_distributed_environment,
|
||||
initialize_model_parallel,
|
||||
set_custom_all_reduce,
|
||||
set_mscclpp_all_reduce,
|
||||
)
|
||||
from sglang.srt.distributed.parallel_state import monkey_patch_vllm_parallel_state
|
||||
from sglang.srt.layers.attention.tbo_backend import TboAttnBackend
|
||||
@@ -460,6 +461,7 @@ class ModelRunner:
|
||||
else:
|
||||
dist_init_method = f"tcp://127.0.0.1:{self.dist_port}"
|
||||
set_custom_all_reduce(not self.server_args.disable_custom_all_reduce)
|
||||
set_mscclpp_all_reduce(self.server_args.enable_mscclpp)
|
||||
|
||||
if not self.is_draft_worker:
|
||||
# Only initialize the distributed environment on the target model worker.
|
||||
|
||||
Reference in New Issue
Block a user