Integrating PD disaggregation with DP attention and DeepEP (#5435)

Co-authored-by: Byron Hsu <byronhsu1230@gmail.com>
This commit is contained in:
Cheng Wan
2025-04-23 01:46:01 -07:00
committed by GitHub
parent fbb5f229d4
commit 711efe7814
3 changed files with 72 additions and 8 deletions

View File

@@ -187,6 +187,14 @@ class SchedulerDisaggregationPrefillMixin:
)
self.process_prefill_chunk()
batch = self.get_new_batch_prefill()
# Handle DP attention
if (
self.server_args.enable_dp_attention
or self.server_args.enable_sp_layernorm
):
batch, _ = self.prepare_dp_attn_batch(batch)
self.cur_batch = batch
if batch:
@@ -217,6 +225,14 @@ class SchedulerDisaggregationPrefillMixin:
)
self.process_prefill_chunk()
batch = self.get_new_batch_prefill()
# Handle DP attention
if (
self.server_args.enable_dp_attention
or self.server_args.enable_sp_layernorm
):
batch, _ = self.prepare_dp_attn_batch(batch)
self.cur_batch = batch
if batch: