Fix DeepSeek DP Attention + torch compile (#5367)

Co-authored-by: ispobock <ispobaoke@163.com>
This commit is contained in:
fzyzcjy
2025-04-14 16:07:58 +08:00
committed by GitHub
parent fc72871975
commit defede5073
3 changed files with 8 additions and 6 deletions

View File

@@ -192,8 +192,7 @@ def _dp_gather(
if local_tokens.shape[0] > 0 and (is_partial or get_attention_tp_rank() == 0):
assert (
global_tokens.untyped_storage().data_ptr()
!= local_tokens.untyped_storage().data_ptr()
local_tokens.untyped_storage() is not global_tokens.untyped_storage()
), "aliasing between global_tokens and local_tokens not allowed"
memcpy_triton(
global_tokens, local_tokens, 0, local_start_pos, local_num_tokens, False
@@ -243,8 +242,7 @@ def dp_scatter(
assert global_tokens.is_contiguous()
if local_tokens.shape[0] > 0:
assert (
local_tokens.untyped_storage().data_ptr()
!= global_tokens.untyped_storage().data_ptr()
local_tokens.untyped_storage() is not global_tokens.untyped_storage()
), "aliasing between local_tokens and global_tokens not allowed"
memcpy_triton(
local_tokens, global_tokens, 0, local_start_pos, local_num_tokens, True