[RL] use cpu group to prepare_mlp_sync_batch_raw when the server is offloaded (#10152)

2025-10-18 14:29:35 +08:00
parent 31b9f19e54
commit e68a2b5b2f
2 changed files with 4 additions and 1 deletions
--- a/python/sglang/bench_one_batch.py
+++ b/python/sglang/bench_one_batch.py
@@ -320,6 +320,7 @@ def _maybe_prepare_mlp_sync_batch(batch: ScheduleBatch, model_runner):
            speculative_num_draft_tokens=None,
            require_mlp_tp_gather=require_mlp_tp_gather(model_runner.server_args),
            disable_overlap_schedule=model_runner.server_args.disable_overlap_schedule,
+            offload_tags=set(),
        )


--- a/python/sglang/srt/managers/scheduler.py
+++ b/python/sglang/srt/managers/scheduler.py
@@ -2339,6 +2339,7 @@ class Scheduler(
            speculative_num_draft_tokens=self.server_args.speculative_num_draft_tokens,
            require_mlp_tp_gather=require_mlp_tp_gather(self.server_args),
            disable_overlap_schedule=self.server_args.disable_overlap_schedule,
+            offload_tags=self.offload_tags,
        )

    @staticmethod
@@ -2353,6 +2354,7 @@ class Scheduler(
        speculative_num_draft_tokens,
        require_mlp_tp_gather: bool,
        disable_overlap_schedule: bool,
+        offload_tags: set[str],
    ):
        # Check if other DP workers have running batches
        if local_batch is None:
@@ -2383,7 +2385,7 @@ class Scheduler(
        )

        tbo_preparer = TboDPAttentionPreparer()
-        if disable_overlap_schedule:
+        if len(offload_tags) == 0 and disable_overlap_schedule:
            group = tp_group.device_group
            device = tp_group.device
        else: