[FA3 Attn Backend] Remove Unnecessary Device Sync for FA3 (#4745)
Co-authored-by: Yubo Wang <yubowang2019@gmail.com>
This commit is contained in:
@@ -1376,6 +1376,7 @@ class ScheduleBatch(ScheduleBatchDisaggregationDecodeMixin):
|
||||
if (
|
||||
global_server_args_dict["enable_flashinfer_mla"]
|
||||
or global_server_args_dict["enable_flashmla"]
|
||||
or global_server_args_dict["attention_backend"] == "fa3"
|
||||
):
|
||||
decode_seq_lens = self.seq_lens.cpu()
|
||||
else:
|
||||
|
||||
Reference in New Issue
Block a user