From e68a2b5b2ff5e4f7ccf63782ed5210fe1ceb8b31 Mon Sep 17 00:00:00 2001
From: Zilin Zhu <zhuzilinallen@gmail.com>
Date: Sat, 18 Oct 2025 14:29:35 +0800
Subject: [PATCH] [RL] use cpu group to prepare_mlp_sync_batch_raw when the
 server is offloaded (#10152)

---
 python/sglang/bench_one_batch.py        | 1 +
 python/sglang/srt/managers/scheduler.py | 4 +++-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/python/sglang/bench_one_batch.py b/python/sglang/bench_one_batch.py
index 2dce0623a..5604495e3 100644
--- a/python/sglang/bench_one_batch.py
+++ b/python/sglang/bench_one_batch.py
@@ -320,6 +320,7 @@ def _maybe_prepare_mlp_sync_batch(batch: ScheduleBatch, model_runner):
             speculative_num_draft_tokens=None,
             require_mlp_tp_gather=require_mlp_tp_gather(model_runner.server_args),
             disable_overlap_schedule=model_runner.server_args.disable_overlap_schedule,
+            offload_tags=set(),
         )
 
 
diff --git a/python/sglang/srt/managers/scheduler.py b/python/sglang/srt/managers/scheduler.py
index ad19af782..78457abc8 100644
--- a/python/sglang/srt/managers/scheduler.py
+++ b/python/sglang/srt/managers/scheduler.py
@@ -2339,6 +2339,7 @@ class Scheduler(
             speculative_num_draft_tokens=self.server_args.speculative_num_draft_tokens,
             require_mlp_tp_gather=require_mlp_tp_gather(self.server_args),
             disable_overlap_schedule=self.server_args.disable_overlap_schedule,
+            offload_tags=self.offload_tags,
         )
 
     @staticmethod
@@ -2353,6 +2354,7 @@ class Scheduler(
         speculative_num_draft_tokens,
         require_mlp_tp_gather: bool,
         disable_overlap_schedule: bool,
+        offload_tags: set[str],
     ):
         # Check if other DP workers have running batches
         if local_batch is None:
@@ -2383,7 +2385,7 @@ class Scheduler(
         )
 
         tbo_preparer = TboDPAttentionPreparer()
-        if disable_overlap_schedule:
+        if len(offload_tags) == 0 and disable_overlap_schedule:
             group = tp_group.device_group
             device = tp_group.device
         else: