Integrating PD disaggregation with DP attention and DeepEP (#5435)

Co-authored-by: Byron Hsu <byronhsu1230@gmail.com>
2025-04-23 01:46:01 -07:00
parent fbb5f229d4
commit 711efe7814
3 changed files with 72 additions and 8 deletions
--- a/python/sglang/srt/disaggregation/prefill.py
+++ b/python/sglang/srt/disaggregation/prefill.py
@@ -187,6 +187,14 @@ class SchedulerDisaggregationPrefillMixin:
            )
            self.process_prefill_chunk()
            batch = self.get_new_batch_prefill()
+
+            # Handle DP attention
+            if (
+                self.server_args.enable_dp_attention
+                or self.server_args.enable_sp_layernorm
+            ):
+                batch, _ = self.prepare_dp_attn_batch(batch)
+
            self.cur_batch = batch

            if batch:
@@ -217,6 +225,14 @@ class SchedulerDisaggregationPrefillMixin:
            )
            self.process_prefill_chunk()
            batch = self.get_new_batch_prefill()
+
+            # Handle DP attention
+            if (
+                self.server_args.enable_dp_attention
+                or self.server_args.enable_sp_layernorm
+            ):
+                batch, _ = self.prepare_dp_attn_batch(batch)
+
            self.cur_batch = batch

            if batch: