Support (1 <= dp < tp) in the dp attention in DeepEP (#4770)
Co-authored-by: Cheng Wan <cwan39@gatech.edu>
This commit is contained in:
@@ -1186,7 +1186,7 @@ class Scheduler(
|
||||
ret = None
|
||||
|
||||
# Handle DP attention
|
||||
if self.server_args.enable_dp_attention:
|
||||
if self.server_args.enable_dp_attention or self.server_args.enable_sp_layernorm:
|
||||
ret, _ = self.prepare_dp_attn_batch(ret)
|
||||
|
||||
return ret
|
||||
|
||||
Reference in New Issue
Block a user