[RL] use cpu group to prepare_mlp_sync_batch_raw when the server is offloaded (#10152)

This commit is contained in:
Zilin Zhu
2025-10-18 14:29:35 +08:00
committed by GitHub
parent 31b9f19e54
commit e68a2b5b2f
2 changed files with 4 additions and 1 deletions

View File

@@ -320,6 +320,7 @@ def _maybe_prepare_mlp_sync_batch(batch: ScheduleBatch, model_runner):
speculative_num_draft_tokens=None,
require_mlp_tp_gather=require_mlp_tp_gather(model_runner.server_args),
disable_overlap_schedule=model_runner.server_args.disable_overlap_schedule,
offload_tags=set(),
)

View File

@@ -2339,6 +2339,7 @@ class Scheduler(
speculative_num_draft_tokens=self.server_args.speculative_num_draft_tokens,
require_mlp_tp_gather=require_mlp_tp_gather(self.server_args),
disable_overlap_schedule=self.server_args.disable_overlap_schedule,
offload_tags=self.offload_tags,
)
@staticmethod
@@ -2353,6 +2354,7 @@ class Scheduler(
speculative_num_draft_tokens,
require_mlp_tp_gather: bool,
disable_overlap_schedule: bool,
offload_tags: set[str],
):
# Check if other DP workers have running batches
if local_batch is None:
@@ -2383,7 +2385,7 @@ class Scheduler(
)
tbo_preparer = TboDPAttentionPreparer()
if disable_overlap_schedule:
if len(offload_tags) == 0 and disable_overlap_schedule:
group = tp_group.device_group
device = tp_group.device
else: