feat: add dp attention support for Qwen 2/3 MoE models, fixes #6088 (#6121)

Co-authored-by: King.Zevin <zevin@mail.ustc.edu.cn>
Co-authored-by: Yi Zhang <1109276519@qq.com>
This commit is contained in:
Fr4nk1in
2025-05-17 05:44:10 +08:00
committed by GitHub
parent 6fc9357503
commit 4bd2952a37
4 changed files with 451 additions and 72 deletions

View File

@@ -142,16 +142,6 @@ def get_local_attention_dp_size():
return _LOCAL_ATTN_DP_SIZE
def get_local_attention_dp_rank():
assert _LOCAL_ATTN_DP_RANK is not None, "dp attention not initialized!"
return _LOCAL_ATTN_DP_RANK
def get_local_attention_dp_size():
assert _LOCAL_ATTN_DP_SIZE is not None, "dp attention not initialized!"
return _LOCAL_ATTN_DP_SIZE
@contextmanager
def disable_dp_size():
"""Patch the tp group temporarily until this function ends.