[FA3 Attn Backend] Remove Unnecessary Device Sync for FA3 (#4745)

Co-authored-by: Yubo Wang <yubowang2019@gmail.com>
This commit is contained in:
Stefan He
2025-03-27 00:45:11 -07:00
committed by GitHub
parent 92bb49a7f9
commit 1b9175cb23
2 changed files with 17 additions and 10 deletions

View File

@@ -1376,6 +1376,7 @@ class ScheduleBatch(ScheduleBatchDisaggregationDecodeMixin):
if (
global_server_args_dict["enable_flashinfer_mla"]
or global_server_args_dict["enable_flashmla"]
or global_server_args_dict["attention_backend"] == "fa3"
):
decode_seq_lens = self.seq_lens.cpu()
else: