feat: add dp attention support for Qwen 2/3 MoE models, fixes #6088 (#6121)

Co-authored-by: King.Zevin <zevin@mail.ustc.edu.cn> Co-authored-by: Yi Zhang <1109276519@qq.com>
2025-05-17 05:44:10 +08:00
parent 6fc9357503
commit 4bd2952a37
4 changed files with 451 additions and 72 deletions
--- a/python/sglang/srt/layers/dp_attention.py
+++ b/python/sglang/srt/layers/dp_attention.py
@@ -142,16 +142,6 @@ def get_local_attention_dp_size():
    return _LOCAL_ATTN_DP_SIZE


-def get_local_attention_dp_rank():
-    assert _LOCAL_ATTN_DP_RANK is not None, "dp attention not initialized!"
-    return _LOCAL_ATTN_DP_RANK
-
-
-def get_local_attention_dp_size():
-    assert _LOCAL_ATTN_DP_SIZE is not None, "dp attention not initialized!"
-    return _LOCAL_ATTN_DP_SIZE
-
-
@contextmanager
 def disable_dp_size():
    """Patch the tp group temporarily until this function ends.