Roll back to use vllm custom allreduce (#3006)

This commit is contained in:
Lianmin Zheng
2025-01-20 04:03:15 -08:00
committed by GitHub
parent dc1881326f
commit 89cd923581
10 changed files with 18 additions and 65 deletions

View File

@@ -63,8 +63,8 @@ from sglang.srt.utils import (
init_custom_process_group,
is_cuda,
is_hip,
monkey_patch_p2p_access_check,
monkey_patch_vllm_gguf_config,
monkey_patch_vllm_p2p_access_check,
set_cpu_offload_max_bytes,
)
@@ -229,7 +229,8 @@ class ModelRunner:
backend = "gloo"
if not self.server_args.enable_p2p_check:
monkey_patch_vllm_p2p_access_check(self.gpu_id)
monkey_patch_p2p_access_check()
if self.server_args.dist_init_addr:
dist_init_method = f"tcp://{self.server_args.dist_init_addr}"
else: