Replace enable_flashinfer_mla argument with attention_backend (#5005)

This commit is contained in:
Baizhou Zhang
2025-04-03 02:53:58 -07:00
committed by GitHub
parent 772d2a191d
commit e8999b13b7
8 changed files with 21 additions and 20 deletions

View File

@@ -76,7 +76,6 @@ global_server_args_dict = {
"device": ServerArgs.device,
"speculative_accept_threshold_single": ServerArgs.speculative_accept_threshold_single,
"speculative_accept_threshold_acc": ServerArgs.speculative_accept_threshold_acc,
"enable_flashinfer_mla": ServerArgs.enable_flashinfer_mla,
"enable_flashmla": ServerArgs.enable_flashmla,
"disable_radix_cache": ServerArgs.disable_radix_cache,
"flashinfer_mla_disable_ragged": ServerArgs.flashinfer_mla_disable_ragged,
@@ -1435,7 +1434,7 @@ class ScheduleBatch(ScheduleBatchDisaggregationDecodeMixin):
# Create seq_lens_cpu when needed
if (
global_server_args_dict["enable_flashinfer_mla"]
global_server_args_dict["attention_backend"] == "flashinfer_mla"
or global_server_args_dict["enable_flashmla"]
or global_server_args_dict["attention_backend"] == "fa3"
):