From 1a57e4167915780c9ba458ff6f3ad5a18e048ee4 Mon Sep 17 00:00:00 2001 From: Lianmin Zheng Date: Mon, 27 May 2024 23:00:16 -0700 Subject: [PATCH] do not launch workers in parallel --- python/sglang/srt/managers/controller/manager_multi.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/python/sglang/srt/managers/controller/manager_multi.py b/python/sglang/srt/managers/controller/manager_multi.py index a3175c92e..83f45b9a8 100644 --- a/python/sglang/srt/managers/controller/manager_multi.py +++ b/python/sglang/srt/managers/controller/manager_multi.py @@ -86,8 +86,12 @@ class Controller: f"Failed to start local worker {i}\n{get_exception_traceback()}" ) - with ThreadPoolExecutor(server_args.dp_size) as executor: - executor.map(start_dp_worker, range(server_args.dp_size)) + for i in range(server_args.dp_size): + start_dp_worker(i) + + # Parallel launch is slower, probably due to the disk bandwidth limitations. + # with ThreadPoolExecutor(server_args.dp_size) as executor: + # executor.map(start_dp_worker, range(server_args.dp_size)) def have_any_live_worker(self): return any(worker_thread.liveness for worker_thread in self.workers.values())