From e68a2b5b2ff5e4f7ccf63782ed5210fe1ceb8b31 Mon Sep 17 00:00:00 2001 From: Zilin Zhu Date: Sat, 18 Oct 2025 14:29:35 +0800 Subject: [PATCH] [RL] use cpu group to prepare_mlp_sync_batch_raw when the server is offloaded (#10152) --- python/sglang/bench_one_batch.py | 1 + python/sglang/srt/managers/scheduler.py | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/python/sglang/bench_one_batch.py b/python/sglang/bench_one_batch.py index 2dce0623a..5604495e3 100644 --- a/python/sglang/bench_one_batch.py +++ b/python/sglang/bench_one_batch.py @@ -320,6 +320,7 @@ def _maybe_prepare_mlp_sync_batch(batch: ScheduleBatch, model_runner): speculative_num_draft_tokens=None, require_mlp_tp_gather=require_mlp_tp_gather(model_runner.server_args), disable_overlap_schedule=model_runner.server_args.disable_overlap_schedule, + offload_tags=set(), ) diff --git a/python/sglang/srt/managers/scheduler.py b/python/sglang/srt/managers/scheduler.py index ad19af782..78457abc8 100644 --- a/python/sglang/srt/managers/scheduler.py +++ b/python/sglang/srt/managers/scheduler.py @@ -2339,6 +2339,7 @@ class Scheduler( speculative_num_draft_tokens=self.server_args.speculative_num_draft_tokens, require_mlp_tp_gather=require_mlp_tp_gather(self.server_args), disable_overlap_schedule=self.server_args.disable_overlap_schedule, + offload_tags=self.offload_tags, ) @staticmethod @@ -2353,6 +2354,7 @@ class Scheduler( speculative_num_draft_tokens, require_mlp_tp_gather: bool, disable_overlap_schedule: bool, + offload_tags: set[str], ): # Check if other DP workers have running batches if local_batch is None: @@ -2383,7 +2385,7 @@ class Scheduler( ) tbo_preparer = TboDPAttentionPreparer() - if disable_overlap_schedule: + if len(offload_tags) == 0 and disable_overlap_schedule: group = tp_group.device_group device = tp_group.device else: