Deprecate enable-flashinfer-mla and enable-flashmla (#5480)
This commit is contained in:
@@ -76,7 +76,6 @@ global_server_args_dict = {
|
||||
"device": ServerArgs.device,
|
||||
"speculative_accept_threshold_single": ServerArgs.speculative_accept_threshold_single,
|
||||
"speculative_accept_threshold_acc": ServerArgs.speculative_accept_threshold_acc,
|
||||
"enable_flashmla": ServerArgs.enable_flashmla,
|
||||
"disable_radix_cache": ServerArgs.disable_radix_cache,
|
||||
"flashinfer_mla_disable_ragged": ServerArgs.flashinfer_mla_disable_ragged,
|
||||
"chunked_prefill_size": ServerArgs.chunked_prefill_size,
|
||||
@@ -1480,7 +1479,7 @@ class ScheduleBatch(ScheduleBatchDisaggregationDecodeMixin):
|
||||
global_server_args_dict["use_mla_backend"]
|
||||
and global_server_args_dict["attention_backend"] == "flashinfer"
|
||||
)
|
||||
or global_server_args_dict["enable_flashmla"]
|
||||
or global_server_args_dict["attention_backend"] == "flashmla"
|
||||
or global_server_args_dict["attention_backend"] == "fa3"
|
||||
):
|
||||
seq_lens_cpu = self.seq_lens.cpu()
|
||||
|
||||
Reference in New Issue
Block a user