Fix accuracy drop of dsv3 run in dp enablement (#8677)
Co-authored-by: wunhuang <wunhuang@amd.com>
This commit is contained in:
@@ -1085,7 +1085,13 @@ class DeepseekV2AttentionMLA(nn.Module):
|
||||
and not forward_batch.forward_mode.is_target_verify()
|
||||
and not forward_batch.forward_mode.is_draft_extend()
|
||||
):
|
||||
return AttnForwardMethod.MHA
|
||||
if is_dp_attention_enabled():
|
||||
if sum(forward_batch.extend_prefix_lens_cpu) == 0:
|
||||
return AttnForwardMethod.MHA
|
||||
else:
|
||||
return AttnForwardMethod.MLA
|
||||
else:
|
||||
return AttnForwardMethod.MHA
|
||||
else:
|
||||
return AttnForwardMethod.MLA
|
||||
else:
|
||||
|
||||
Reference in New Issue
Block a user