Roll back to use vllm custom allreduce (#3006)

2025-01-20 04:03:15 -08:00
parent dc1881326f
commit 89cd923581
10 changed files with 18 additions and 65 deletions
--- a/python/sglang/srt/model_executor/model_runner.py
+++ b/python/sglang/srt/model_executor/model_runner.py
@@ -63,8 +63,8 @@ from sglang.srt.utils import (
    init_custom_process_group,
    is_cuda,
    is_hip,
+    monkey_patch_p2p_access_check,
    monkey_patch_vllm_gguf_config,
-    monkey_patch_vllm_p2p_access_check,
    set_cpu_offload_max_bytes,
 )

@@ -229,7 +229,8 @@ class ModelRunner:
            backend = "gloo"

        if not self.server_args.enable_p2p_check:
-            monkey_patch_vllm_p2p_access_check(self.gpu_id)
+            monkey_patch_p2p_access_check()
+
        if self.server_args.dist_init_addr:
            dist_init_method = f"tcp://{self.server_args.dist_init_addr}"
        else: