[Feature] Comprehensive Hybrid Parallelism Support (#6389)

2025-06-20 14:43:11 -07:00
parent 0998808009
commit e879d8b7a8
14 changed files with 3689 additions and 108 deletions
--- a/python/sglang/srt/layers/dp_attention.py
+++ b/python/sglang/srt/layers/dp_attention.py
@@ -165,7 +165,8 @@ def disable_dp_size():


 def get_dp_local_info(forward_batch: ForwardBatch):
-    dp_rank = get_local_attention_dp_rank()
+    # `get_dp_local_info` is only called in global DP gather and scatter. We use global DP rank here.
+    dp_rank = get_attention_dp_rank()

    if forward_batch.dp_local_start_pos is None:
        cumtokens = torch.cumsum(forward_batch.global_num_tokens_gpu, dim=0)